Kaushik Rajan commited on
Commit
28b6f0f
·
1 Parent(s): 7102d41

Feat: Add advanced simulation features and dynamic AI

Browse files
Files changed (1) hide show
  1. app.py +133 -18
app.py CHANGED
@@ -13,6 +13,7 @@ import numpy as np
13
  import pandas as pd
14
  import plotly.express as px
15
  import spaces
 
16
 
17
  # --- Game Configuration ---
18
  INITIAL_BUDGET = 1000
@@ -117,6 +118,10 @@ class BusinessCompetitionEnv:
117
  self.player_stats["budget"] = int(player_allocation["sales"] * player_sales_roi + player_remaining_budget)
118
  self.ai_stats["budget"] = int(ai_allocation["sales"] * ai_sales_roi + ai_remaining_budget)
119
 
 
 
 
 
120
  if self.quarter >= NUM_QUARTERS:
121
  self.game_over = True
122
 
@@ -126,7 +131,7 @@ class BusinessCompetitionEnv:
126
 
127
  # --- AI Logic ---
128
 
129
- def ai_strategy(ai_stats, player_stats):
130
  """
131
  A heuristic-based AI to simulate a strategic opponent.
132
  This mimics the kind of robust strategy that would emerge from self-play,
@@ -139,39 +144,52 @@ def ai_strategy(ai_stats, player_stats):
139
  allocation = {"rd": 0.33, "marketing": 0.34, "sales": 0.33}
140
 
141
  # --- Strategic Adjustments based on SPIRAL principles ---
 
 
 
 
 
 
142
  # 1. React to quality gap (long-term planning)
143
- if ai_stats["product_quality"] < player_stats["product_quality"] - 15:
144
  allocation["rd"] += 0.2
145
  allocation["marketing"] -= 0.1
146
  allocation["sales"] -= 0.1
147
- reasoning.append("My analysis indicates a growing product quality gap. I'm increasing R&D investment to innovate and secure a long-term competitive advantage.")
148
 
149
  # 2. React to market share loss (short-term defense)
150
- elif ai_stats["market_share"] < player_stats["market_share"] - 10:
151
  allocation["marketing"] += 0.2
152
  allocation["rd"] -= 0.1
153
  allocation["sales"] -= 0.1
154
- reasoning.append("You've recently captured significant market share. I'm launching an aggressive marketing campaign to win back customers and regain my position.")
155
 
156
  # 3. Exploit a quality advantage (pressing an advantage)
157
- if ai_stats["product_quality"] > player_stats["product_quality"] + 20:
158
  allocation["marketing"] += 0.15
159
  allocation["rd"] -= 0.15
160
- reasoning.append(f"My product quality ({ai_stats['product_quality']:.0f}) is superior. I will leverage this with a marketing push to translate product leadership into market dominance.")
161
 
162
  # 4. Manage budget (resource management)
163
- if ai_stats["budget"] < player_stats["budget"] * 0.8:
164
  allocation["sales"] += 0.15
165
  allocation["rd"] -= 0.15
166
- reasoning.append("My projections show a potential budget shortfall. I am focusing on sales to ensure strong revenue growth for future quarters.")
167
 
168
  if not reasoning:
169
- reasoning.append("I am pursuing a balanced strategy, investing across R&D, Marketing, and Sales to ensure steady, long-term growth and market presence.")
170
 
171
  # Normalize allocations
172
  total_allocation = sum(allocation.values())
173
  final_allocation = {key: int(budget * (val / total_allocation)) for key, val in allocation.items()}
174
 
 
 
 
 
 
 
 
175
  # Ensure the sum is exactly the budget
176
  diff = budget - sum(final_allocation.values())
177
  final_allocation['sales'] += diff
@@ -208,6 +226,11 @@ def create_interface():
208
  - **Multi-Turn Reasoning:** Observe the AI's rationale. It often makes decisions based on future projections (e.g., potential budget shortfalls or quality gaps), showcasing a capacity for long-term planning.
209
  - **Zero-Sum Dynamics:** The simulation is a zero-sum game for market share, creating the competitive pressure that, according to the SPIRAL paper, is essential for incentivizing robust reasoning.
210
 
 
 
 
 
 
211
  ### How to Use the App
212
 
213
  1. **Your Goal:** Achieve a higher market share than the AI by the end of 12 quarters.
@@ -255,9 +278,17 @@ def create_interface():
255
  with gr.Row():
256
  submit_btn = gr.Button("End Quarter", variant="primary")
257
  new_game_btn = gr.Button("Start New Game")
 
 
 
 
 
258
 
259
  gr.Markdown("### 🧠 AI Strategic Reasoning")
260
  ai_reasoning_box = gr.Textbox("", label="AI Decision Rationale", lines=5, interactive=False)
 
 
 
261
 
262
  def create_plots(history):
263
  df = pd.DataFrame(history)
@@ -286,7 +317,8 @@ def create_interface():
286
  gr.update(value=f"Your Budget: ${player_budget}"),
287
  gr.update(), gr.update(), gr.update(), # Raw sliders
288
  gr.update(), gr.update(), gr.update(), # Pct sliders
289
- gr.update(interactive=True) # Submit button
 
290
  )
291
 
292
  if mode == "Percentages":
@@ -307,7 +339,7 @@ def create_interface():
307
  return create_error_return(f"Error: Allocation (${rd_alloc_val + mkt_alloc_val + sales_alloc_val}) exceeds budget (${player_budget}).")
308
 
309
  player_alloc = {"rd": rd_alloc_val, "marketing": mkt_alloc_val, "sales": sales_alloc_val}
310
- ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats)
311
  env.ai_stats["last_reasoning"] = ai_reasoning
312
 
313
  env.step(player_alloc, ai_alloc)
@@ -316,10 +348,16 @@ def create_interface():
316
  plots = create_plots(state["history"])
317
 
318
  submit_btn_update = gr.update(interactive=True)
 
319
  if state["game_over"]:
320
  winner = env.get_winner()
321
  status_text = f"Game Over! Winner: {winner}. Final market share: You ({state['player_stats']['market_share']:.1f}%) vs AI ({state['ai_stats']['market_share']:.1f}%)."
322
  submit_btn_update = gr.update(interactive=False)
 
 
 
 
 
323
  else:
324
  status_text = f"End of Quarter {state['quarter']}. Your turn."
325
 
@@ -332,7 +370,8 @@ def create_interface():
332
  gr.update(maximum=new_budget, value=int(new_budget/3)),
333
  gr.update(maximum=new_budget, value=new_budget - 2 * int(new_budget/3)),
334
  gr.update(value=33), gr.update(value=33), gr.update(value=34),
335
- submit_btn_update
 
336
  )
337
 
338
  def on_new_game():
@@ -346,7 +385,8 @@ def create_interface():
346
  gr.update(maximum=INITIAL_BUDGET, value=333),
347
  gr.update(maximum=INITIAL_BUDGET, value=334),
348
  gr.update(value=33), gr.update(value=33), gr.update(value=34),
349
- gr.update(interactive=True)
 
350
  )
351
 
352
  def update_total_raw_display(rd, mkt, sales):
@@ -358,6 +398,49 @@ def create_interface():
358
  def toggle_allocation_mode(mode):
359
  return gr.update(visible=mode == "Raw Values"), gr.update(visible=mode == "Percentages")
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  # --- Event Handlers ---
362
  submit_btn.click(
363
  fn=game_step_and_update,
@@ -368,7 +451,8 @@ def create_interface():
368
  player_budget_display,
369
  rd_slider_raw, mkt_slider_raw, sales_slider_raw,
370
  rd_slider_pct, mkt_slider_pct, sales_slider_pct,
371
- submit_btn
 
372
  ]
373
  )
374
 
@@ -381,7 +465,34 @@ def create_interface():
381
  player_budget_display,
382
  rd_slider_raw, mkt_slider_raw, sales_slider_raw,
383
  rd_slider_pct, mkt_slider_pct, sales_slider_pct,
384
- submit_btn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  ]
386
  )
387
 
@@ -392,6 +503,10 @@ def create_interface():
392
  for slider in [rd_slider_pct, mkt_slider_pct, sales_slider_pct]:
393
  slider.change(fn=update_total_pct_display, inputs=[rd_slider_pct, mkt_slider_pct, sales_slider_pct], outputs=total_allocated_pct_display)
394
 
 
 
 
 
395
  # Handler for toggling allocation modes
396
  allocation_mode_radio.change(
397
  fn=toggle_allocation_mode,
@@ -399,11 +514,11 @@ def create_interface():
399
  outputs=[raw_values_group, percentage_group]
400
  )
401
 
402
- demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider_raw, mkt_slider_raw, sales_slider_raw, rd_slider_pct, mkt_slider_pct, sales_slider_pct, submit_btn])
403
 
404
  return demo
405
 
406
 
407
  if __name__ == "__main__":
408
  spiral_demo = create_interface()
409
- spiral_demo.launch()
 
13
  import pandas as pd
14
  import plotly.express as px
15
  import spaces
16
+ import json
17
 
18
  # --- Game Configuration ---
19
  INITIAL_BUDGET = 1000
 
118
  self.player_stats["budget"] = int(player_allocation["sales"] * player_sales_roi + player_remaining_budget)
119
  self.ai_stats["budget"] = int(ai_allocation["sales"] * ai_sales_roi + ai_remaining_budget)
120
 
121
+ # Error Handling: Clamp budgets to >=0
122
+ self.player_stats["budget"] = max(0, self.player_stats["budget"])
123
+ self.ai_stats["budget"] = max(0, self.ai_stats["budget"])
124
+
125
  if self.quarter >= NUM_QUARTERS:
126
  self.game_over = True
127
 
 
131
 
132
  # --- AI Logic ---
133
 
134
+ def ai_strategy(ai_stats, player_stats, quarter):
135
  """
136
  A heuristic-based AI to simulate a strategic opponent.
137
  This mimics the kind of robust strategy that would emerge from self-play,
 
144
  allocation = {"rd": 0.33, "marketing": 0.34, "sales": 0.33}
145
 
146
  # --- Strategic Adjustments based on SPIRAL principles ---
147
+ # Dynamic thresholds: Tighten as game progresses (simulates adaptive curriculum)
148
+ quality_gap_threshold = 15 - (quarter // 3) # E.g., starts at 15, drops to 9 by quarter 9
149
+ market_share_threshold = 10 - (quarter // 4) # Starts at 10, drops to 7 by quarter 8
150
+ quality_advantage_threshold = 20 - (quarter // 3)
151
+ budget_threshold = 0.8 + (quarter / 100.0) # Slightly increases to make AI more conservative later
152
+
153
  # 1. React to quality gap (long-term planning)
154
+ if ai_stats["product_quality"] < player_stats["product_quality"] - quality_gap_threshold:
155
  allocation["rd"] += 0.2
156
  allocation["marketing"] -= 0.1
157
  allocation["sales"] -= 0.1
158
+ reasoning.append(f"Quarter {quarter}: My analysis indicates a growing product quality gap (threshold: {quality_gap_threshold}). I'm increasing R&D investment to innovate and secure a long-term competitive advantage.")
159
 
160
  # 2. React to market share loss (short-term defense)
161
+ elif ai_stats["market_share"] < player_stats["market_share"] - market_share_threshold:
162
  allocation["marketing"] += 0.2
163
  allocation["rd"] -= 0.1
164
  allocation["sales"] -= 0.1
165
+ reasoning.append(f"Quarter {quarter}: You've recently captured significant market share (threshold: {market_share_threshold}). I'm launching an aggressive marketing campaign to win back customers and regain my position.")
166
 
167
  # 3. Exploit a quality advantage (pressing an advantage)
168
+ if ai_stats["product_quality"] > player_stats["product_quality"] + quality_advantage_threshold:
169
  allocation["marketing"] += 0.15
170
  allocation["rd"] -= 0.15
171
+ reasoning.append(f"Quarter {quarter}: My product quality ({ai_stats['product_quality']:.0f}) is superior (threshold: {quality_advantage_threshold}). I will leverage this with a marketing push to translate product leadership into market dominance.")
172
 
173
  # 4. Manage budget (resource management)
174
+ if ai_stats["budget"] < player_stats["budget"] * budget_threshold:
175
  allocation["sales"] += 0.15
176
  allocation["rd"] -= 0.15
177
+ reasoning.append(f"Quarter {quarter}: My projections show a potential budget shortfall (threshold: {budget_threshold:.2f}). I am focusing on sales to ensure strong revenue growth for future quarters.")
178
 
179
  if not reasoning:
180
+ reasoning.append(f"Quarter {quarter}: I am pursuing a balanced strategy, investing across R&D, Marketing, and Sales to ensure steady, long-term growth and market presence.")
181
 
182
  # Normalize allocations
183
  total_allocation = sum(allocation.values())
184
  final_allocation = {key: int(budget * (val / total_allocation)) for key, val in allocation.items()}
185
 
186
+ # Simulate RAE-inspired stability: Average with a "role-reversed" allocation
187
+ role_reversed_alloc = {"rd": allocation["rd"], "marketing": allocation["sales"], "sales": allocation["marketing"]} # Simple swap for variance reduction
188
+ reversed_total = sum(role_reversed_alloc.values())
189
+ reversed_final = {key: int(budget * (val / reversed_total)) for key, val in role_reversed_alloc.items()}
190
+ for key in final_allocation:
191
+ final_allocation[key] = int((final_allocation[key] + reversed_final[key]) / 2)
192
+
193
  # Ensure the sum is exactly the budget
194
  diff = budget - sum(final_allocation.values())
195
  final_allocation['sales'] += diff
 
226
  - **Multi-Turn Reasoning:** Observe the AI's rationale. It often makes decisions based on future projections (e.g., potential budget shortfalls or quality gaps), showcasing a capacity for long-term planning.
227
  - **Zero-Sum Dynamics:** The simulation is a zero-sum game for market share, creating the competitive pressure that, according to the SPIRAL paper, is essential for incentivizing robust reasoning.
228
 
229
+ ### Key Links to SPIRAL Paper Takeaways
230
+ - **Transferable Reasoning:** Your R&D investments build long-term planning skills, transferable to real-world logic problems (Takeaway 2).
231
+ - **Diverse Skills:** Marketing encourages probabilistic thinking (like Poker), while Sales focuses on resource foresight (Takeaway 4).
232
+ - **Synergy from Multi-Game Training:** Combining these creates a well-rounded strategy, better than focusing on one area (Takeaway 5).
233
+
234
  ### How to Use the App
235
 
236
  1. **Your Goal:** Achieve a higher market share than the AI by the end of 12 quarters.
 
278
  with gr.Row():
279
  submit_btn = gr.Button("End Quarter", variant="primary")
280
  new_game_btn = gr.Button("Start New Game")
281
+ ai_vs_ai_btn = gr.Button("Simulate AI vs AI")
282
+
283
+ with gr.Row():
284
+ save_btn = gr.Button("Save Game")
285
+ load_file = gr.File(label="Load Game JSON")
286
 
287
  gr.Markdown("### 🧠 AI Strategic Reasoning")
288
  ai_reasoning_box = gr.Textbox("", label="AI Decision Rationale", lines=5, interactive=False)
289
+
290
+ gr.Markdown("### 📝 Post-Game Analysis")
291
+ analysis_box = gr.Textbox("", label="Strategy Insights", lines=3, interactive=False)
292
 
293
  def create_plots(history):
294
  df = pd.DataFrame(history)
 
317
  gr.update(value=f"Your Budget: ${player_budget}"),
318
  gr.update(), gr.update(), gr.update(), # Raw sliders
319
  gr.update(), gr.update(), gr.update(), # Pct sliders
320
+ gr.update(interactive=True), # Submit button
321
+ gr.update() # Analysis box
322
  )
323
 
324
  if mode == "Percentages":
 
339
  return create_error_return(f"Error: Allocation (${rd_alloc_val + mkt_alloc_val + sales_alloc_val}) exceeds budget (${player_budget}).")
340
 
341
  player_alloc = {"rd": rd_alloc_val, "marketing": mkt_alloc_val, "sales": sales_alloc_val}
342
+ ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats, env.quarter + 1) # Pass next quarter
343
  env.ai_stats["last_reasoning"] = ai_reasoning
344
 
345
  env.step(player_alloc, ai_alloc)
 
348
  plots = create_plots(state["history"])
349
 
350
  submit_btn_update = gr.update(interactive=True)
351
+ analysis_text = ""
352
  if state["game_over"]:
353
  winner = env.get_winner()
354
  status_text = f"Game Over! Winner: {winner}. Final market share: You ({state['player_stats']['market_share']:.1f}%) vs AI ({state['ai_stats']['market_share']:.1f}%)."
355
  submit_btn_update = gr.update(interactive=False)
356
+ # Post-game analysis
357
+ final_history = state["history"][-1]
358
+ rd_invest = final_history["Player Product Quality"] - INITIAL_PRODUCT_QUALITY
359
+ sales_focus = final_history["Player Budget"] > INITIAL_BUDGET
360
+ analysis_text = f"Post-Game Analysis: Your strategy showed synergy by balancing skills—e.g., high R&D (quality gain: {rd_invest}) with Sales (budget growth: {sales_focus}) led to transferable reasoning advantages."
361
  else:
362
  status_text = f"End of Quarter {state['quarter']}. Your turn."
363
 
 
370
  gr.update(maximum=new_budget, value=int(new_budget/3)),
371
  gr.update(maximum=new_budget, value=new_budget - 2 * int(new_budget/3)),
372
  gr.update(value=33), gr.update(value=33), gr.update(value=34),
373
+ submit_btn_update,
374
+ analysis_text
375
  )
376
 
377
  def on_new_game():
 
385
  gr.update(maximum=INITIAL_BUDGET, value=333),
386
  gr.update(maximum=INITIAL_BUDGET, value=334),
387
  gr.update(value=33), gr.update(value=33), gr.update(value=34),
388
+ gr.update(interactive=True),
389
+ ""
390
  )
391
 
392
  def update_total_raw_display(rd, mkt, sales):
 
398
  def toggle_allocation_mode(mode):
399
  return gr.update(visible=mode == "Raw Values"), gr.update(visible=mode == "Percentages")
400
 
401
+ def adjust_pct_sliders(rd, mkt):
402
+ return gr.update(value=100 - rd - mkt)
403
+
404
+ def simulate_ai_vs_ai():
405
+ env = BusinessCompetitionEnv()
406
+ all_reasoning = []
407
+ for q in range(1, NUM_QUARTERS + 1):
408
+ player_alloc, player_reasoning = ai_strategy(env.player_stats, env.ai_stats, q) # Player as AI copy
409
+ ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats, q)
410
+ env.step(player_alloc, ai_alloc)
411
+ all_reasoning.append(f"Quarter {q}: AI1 Reasoning: {player_reasoning} | AI2 Reasoning: {ai_reasoning}")
412
+ state = env.get_state()
413
+ winner = env.get_winner()
414
+ plots = create_plots(state["history"])
415
+ analysis_text = f"AI vs AI Simulation: Synergy in self-play led to balanced strategies. Winner: {winner}."
416
+ return "\n\n".join(all_reasoning), *plots, f"AI vs AI Simulation Complete! Winner: {winner}", analysis_text
417
+
418
+ def save_game(env):
419
+ return json.dumps(env.get_state()["history"])
420
+
421
+ def load_game(file):
422
+ if file is None:
423
+ return None, "No file uploaded."
424
+ with open(file.name, "r") as f:
425
+ history = json.load(f)
426
+ env = BusinessCompetitionEnv()
427
+ env.history = history
428
+ env.quarter = history[-1]["Quarter"]
429
+ env.player_stats = {
430
+ "budget": history[-1]["Player Budget"],
431
+ "market_share": history[-1]["Player Market Share"],
432
+ "product_quality": history[-1]["Player Product Quality"],
433
+ }
434
+ env.ai_stats = {
435
+ "budget": history[-1]["AI Budget"],
436
+ "market_share": history[-1]["AI Market Share"],
437
+ "product_quality": history[-1]["AI Product Quality"],
438
+ }
439
+ env.game_over = env.quarter >= NUM_QUARTERS
440
+ plots = create_plots(env.history)
441
+ status = f"Loaded game at Quarter {env.quarter}. Your move." if not env.game_over else "Loaded completed game."
442
+ return env, status, "", *plots, gr.update(value=f"Your Budget: ${env.player_stats['budget']}"), *([gr.update()] * 6), gr.update(interactive=not env.game_over), ""
443
+
444
  # --- Event Handlers ---
445
  submit_btn.click(
446
  fn=game_step_and_update,
 
451
  player_budget_display,
452
  rd_slider_raw, mkt_slider_raw, sales_slider_raw,
453
  rd_slider_pct, mkt_slider_pct, sales_slider_pct,
454
+ submit_btn,
455
+ analysis_box
456
  ]
457
  )
458
 
 
465
  player_budget_display,
466
  rd_slider_raw, mkt_slider_raw, sales_slider_raw,
467
  rd_slider_pct, mkt_slider_pct, sales_slider_pct,
468
+ submit_btn,
469
+ analysis_box
470
+ ]
471
+ )
472
+
473
+ ai_vs_ai_btn.click(
474
+ fn=simulate_ai_vs_ai,
475
+ inputs=[],
476
+ outputs=[ai_reasoning_box, plot_market_share, plot_budget, plot_quality, status_box, analysis_box]
477
+ )
478
+
479
+ save_btn.click(
480
+ fn=save_game,
481
+ inputs=game_env,
482
+ outputs=gr.File(label="Download Game JSON")
483
+ )
484
+
485
+ load_file.change(
486
+ fn=load_game,
487
+ inputs=load_file,
488
+ outputs=[
489
+ game_env, status_box, ai_reasoning_box,
490
+ plot_market_share, plot_budget, plot_quality,
491
+ player_budget_display,
492
+ rd_slider_raw, mkt_slider_raw, sales_slider_raw,
493
+ rd_slider_pct, mkt_slider_pct, sales_slider_pct,
494
+ submit_btn,
495
+ analysis_box
496
  ]
497
  )
498
 
 
503
  for slider in [rd_slider_pct, mkt_slider_pct, sales_slider_pct]:
504
  slider.change(fn=update_total_pct_display, inputs=[rd_slider_pct, mkt_slider_pct, sales_slider_pct], outputs=total_allocated_pct_display)
505
 
506
+ # Auto-adjust percentage sliders
507
+ rd_slider_pct.change(fn=adjust_pct_sliders, inputs=[rd_slider_pct, mkt_slider_pct], outputs=sales_slider_pct)
508
+ mkt_slider_pct.change(fn=adjust_pct_sliders, inputs=[rd_slider_pct, mkt_slider_pct], outputs=sales_slider_pct)
509
+
510
  # Handler for toggling allocation modes
511
  allocation_mode_radio.change(
512
  fn=toggle_allocation_mode,
 
514
  outputs=[raw_values_group, percentage_group]
515
  )
516
 
517
+ demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider_raw, mkt_slider_raw, sales_slider_raw, rd_slider_pct, mkt_slider_pct, sales_slider_pct, submit_btn, analysis_box])
518
 
519
  return demo
520
 
521
 
522
  if __name__ == "__main__":
523
  spiral_demo = create_interface()
524
+ spiral_demo.launch()