Kaushik Rajan commited on
Commit
9671560
Β·
1 Parent(s): 842d62b

Fix: Add @spaces.GPU decorator to resolve Hugging Face runtime error

Browse files
Files changed (1) hide show
  1. app.py +63 -97
app.py CHANGED
@@ -9,6 +9,7 @@ This simplified demo shows how strategic reasoning emerges from self-play in zer
9
  import gradio as gr
10
  import numpy as np
11
  import random
 
12
 
13
 
14
  class TicTacToeEnv:
@@ -110,7 +111,7 @@ def check_winner(board):
110
  return board[0, 0]
111
 
112
  if abs(np.fliplr(board).diagonal().sum()) == 3:
113
- return board[0, 2]
114
 
115
  return None
116
 
@@ -259,8 +260,15 @@ def create_interface():
259
 
260
  ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
261
 
 
262
  def play_tictactoe(position, stats):
263
- """Play a TicTacToe move and demonstrate AI reasoning."""
 
 
 
 
 
 
264
  if tictactoe_env.game_over:
265
  yield *update_board_buttons(), "Game is over! Click 'New Game' to start again.", "", stats
266
  return
@@ -294,7 +302,7 @@ def create_interface():
294
  yield *update_board_buttons(), "Game is a draw!", "", stats
295
  return
296
  ai_action = random.choice(valid_actions)
297
-
298
  # Generate reasoning explanation
299
  reasoning = generate_reasoning(tictactoe_env.board.copy(), position, ai_action)
300
 
@@ -309,7 +317,7 @@ def create_interface():
309
  yield *update_board_buttons(), f"Game Over! {winner} won! AI played position {ai_action}.", reasoning, stats
310
  else:
311
  yield *update_board_buttons(), f"AI chose position {ai_action}. Your turn!", reasoning, stats
312
-
313
  except Exception as e:
314
  yield *update_board_buttons(), f"Error: {str(e)}", "", stats
315
 
@@ -318,114 +326,72 @@ def create_interface():
318
  tictactoe_env.reset()
319
  return *update_board_buttons(), "New game started! You are ❌ (X). Click a square to demonstrate strategic reasoning.", "The AI will explain its strategic decision-making process...", stats
320
 
321
- # Initialize the board
322
- tictactoe_env.reset()
323
-
324
- # Game interface
325
  with gr.Row():
326
- gr.Markdown("### Strategic TicTacToe")
327
- gr.Markdown("") # spacer
328
- ttt_reset_btn = gr.Button("πŸ”„ New Game", variant="secondary", size="sm")
329
-
330
- gr.Markdown("**You are ❌ (X)** - The AI uses minimax tree search to demonstrate strategic reasoning")
331
-
332
- # Game board
333
- with gr.Column(elem_classes=["ttt-board"]):
334
- board_buttons = []
335
- for i in range(3):
336
- with gr.Row(elem_classes=["ttt-row"]):
337
- for j in range(3):
338
- pos = i * 3 + j
339
- button = gr.Button("", elem_id=f"ttt-cell-{pos}", size="lg", value="")
340
- board_buttons.append(button)
 
 
 
341
 
342
- # Stats display
343
- with gr.Row():
344
- ttt_stats_display = gr.Markdown(value="**Wins: 0 | Losses: 0 | Draws: 0**", elem_classes=["ttt-stats"])
 
 
 
 
 
 
 
345
 
346
- # Game status and AI reasoning
347
- ttt_message = gr.Textbox(
348
- label="🎯 Game Status",
349
- value="Click a square to start! Watch how the AI reasons strategically.",
350
- lines=2,
351
- interactive=False
352
- )
353
 
354
- ttt_reasoning = gr.Textbox(
355
- label="🧠 AI Strategic Reasoning",
356
- value="The AI will explain its strategic decision-making process here, demonstrating how reasoning emerges from self-play training in zero-sum games.",
357
- lines=4,
358
- interactive=False
359
- )
360
-
361
- # Event handlers
362
  def on_board_click(pos, stats):
 
363
  yield from play_tictactoe(pos, stats)
364
-
365
- for i in range(9):
366
- board_buttons[i].click(
367
- fn=on_board_click,
368
- inputs=[gr.State(i), ttt_stats],
369
- outputs=[*board_buttons, ttt_message, ttt_reasoning, ttt_stats]
370
- )
371
 
372
- ttt_reset_btn.click(
373
- fn=reset_tictactoe,
 
 
 
 
 
 
 
 
 
374
  inputs=[ttt_stats],
375
- outputs=[*board_buttons, ttt_message, ttt_reasoning, ttt_stats]
376
  )
377
 
378
- # Update stats display
379
  ttt_stats.change(
380
- fn=lambda s: f"**Wins: {s['wins']} | Losses: {s['losses']} | Draws: {s['draws']}**",
381
  inputs=ttt_stats,
382
- outputs=ttt_stats_display
383
- )
384
-
385
- # Initialize board display on load
386
- demo.load(
387
- fn=lambda stats: (*update_board_buttons(), "Click a square to start! Watch how the AI reasons strategically.", "The AI will explain its strategic decision-making process here, demonstrating how reasoning emerges from self-play training in zero-sum games.", stats),
388
- inputs=[ttt_stats],
389
- outputs=[*board_buttons, ttt_message, ttt_reasoning, ttt_stats]
390
  )
391
 
392
- # Key concepts section
393
- gr.Markdown("---")
394
- gr.Markdown("## 🧠 Key SPIRAL Concepts Demonstrated")
395
-
396
- with gr.Row():
397
- with gr.Column():
398
- gr.Markdown("""
399
- **🎯 Strategic Reasoning**
400
- - AI uses minimax tree search
401
- - Evaluates all possible future moves
402
- - Chooses optimal strategic actions
403
- """)
404
-
405
- with gr.Column():
406
- gr.Markdown("""
407
- **πŸ”„ Self-Play Learning**
408
- - Strategic patterns emerge from competition
409
- - Zero-sum games incentivize reasoning
410
- - Multi-agent interactions develop intelligence
411
- """)
412
-
413
- gr.Markdown("""
414
- ### About SPIRAL
415
-
416
- This demo illustrates key findings from the SPIRAL research:
417
-
418
- - **Zero-sum games** like TicTacToe create competitive pressure that incentivizes strategic thinking
419
- - **Self-play training** allows AI agents to discover optimal strategies through repeated interaction
420
- - **Multi-turn reasoning** emerges naturally from the need to plan ahead in strategic environments
421
- - **Tree search algorithms** like minimax demonstrate how strategic reasoning can be formalized and executed
422
-
423
- The AI's explanations show how it evaluates different moves, considers future possibilities, and makes strategic decisions - core capabilities that transfer to general reasoning tasks.
424
- """)
425
-
426
  return demo
427
 
428
 
429
  if __name__ == "__main__":
430
- demo = create_interface()
431
- demo.launch()
 
 
9
  import gradio as gr
10
  import numpy as np
11
  import random
12
+ import spaces
13
 
14
 
15
  class TicTacToeEnv:
 
111
  return board[0, 0]
112
 
113
  if abs(np.fliplr(board).diagonal().sum()) == 3:
114
+ return self.board[0, 2]
115
 
116
  return None
117
 
 
260
 
261
  ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
262
 
263
+ @spaces.GPU
264
  def play_tictactoe(position, stats):
265
+ """
266
+ Main game loop for TicTacToe. Handles human move, AI response, and updates state.
267
+ This function is decorated with @spaces.GPU to satisfy the Hugging Face Spaces
268
+ runtime, even though the TicTacToe logic does not require GPU acceleration.
269
+ The underlying issue is a mismatch between the selected GPU hardware and the
270
+ CPU-bound nature of the application.
271
+ """
272
  if tictactoe_env.game_over:
273
  yield *update_board_buttons(), "Game is over! Click 'New Game' to start again.", "", stats
274
  return
 
302
  yield *update_board_buttons(), "Game is a draw!", "", stats
303
  return
304
  ai_action = random.choice(valid_actions)
305
+
306
  # Generate reasoning explanation
307
  reasoning = generate_reasoning(tictactoe_env.board.copy(), position, ai_action)
308
 
 
317
  yield *update_board_buttons(), f"Game Over! {winner} won! AI played position {ai_action}.", reasoning, stats
318
  else:
319
  yield *update_board_buttons(), f"AI chose position {ai_action}. Your turn!", reasoning, stats
320
+
321
  except Exception as e:
322
  yield *update_board_buttons(), f"Error: {str(e)}", "", stats
323
 
 
326
  tictactoe_env.reset()
327
  return *update_board_buttons(), "New game started! You are ❌ (X). Click a square to demonstrate strategic reasoning.", "The AI will explain its strategic decision-making process...", stats
328
 
 
 
 
 
329
  with gr.Row():
330
+ with gr.Column(scale=2):
331
+ status_box = gr.Textbox("Welcome to SPIRAL TicTacToe! You are ❌ (X). Click a square to begin.", label="Game Status", interactive=False)
332
+ reasoning_box = gr.Textbox("The AI will explain its strategic moves here.", label="AI Reasoning", interactive=False, lines=4)
333
+
334
+ with gr.Column(elem_classes=["ttt-board"]):
335
+ board_buttons = []
336
+ for i in range(3):
337
+ with gr.Row():
338
+ for j in range(3):
339
+ pos = i * 3 + j
340
+ btn = gr.Button("", elem_id=f"ttt-btn-{pos}")
341
+ board_buttons.append(btn)
342
+
343
+ with gr.Row():
344
+ new_game_btn = gr.Button("New Game", variant="primary")
345
+
346
+ # Hidden state for passing button clicks
347
+ clicked_pos = gr.Textbox(visible=False)
348
 
349
+ with gr.Column(scale=1):
350
+ gr.Markdown("### πŸ“Š Game Stats")
351
+ stats_display = gr.Markdown("Wins: 0 | Losses: 0 | Draws: 0", elem_classes=["ttt-stats"])
352
+
353
+ def update_stats_display(stats):
354
+ return f"Wins: {stats['wins']} | Losses: {stats['losses']} | Draws: {stats['draws']}"
355
+
356
+ gr.Markdown("""
357
+ ### πŸ€” What is SPIRAL?
358
+ SPIRAL stands for **Self-Play in Reinforcement Learning**. This demo illustrates a core concept from the paper: by playing against itself millions of times, an AI can learn complex, human-like strategic reasoning without being explicitly programmed with rules like "take the center square."
359
 
360
+ The AI here uses a simple **minimax** algorithm, a classic game theory tree search method, to find the optimal move. This serves as a stand-in for the more complex neural networks used in the actual SPIRAL research.
361
+ """)
362
+
363
+ # --- Event Handlers ---
 
 
 
364
 
 
 
 
 
 
 
 
 
365
  def on_board_click(pos, stats):
366
+ """Handler for board button clicks. Propagates to main game logic."""
367
  yield from play_tictactoe(pos, stats)
 
 
 
 
 
 
 
368
 
369
+ # Link button clicks to the handler
370
+ for i, btn in enumerate(board_buttons):
371
+ btn.click(
372
+ fn=on_board_click,
373
+ inputs=[gr.Textbox(str(i), visible=False), ttt_stats],
374
+ outputs=[*board_buttons, status_box, reasoning_box, ttt_stats]
375
+ )
376
+
377
+ # Link new game button to reset function
378
+ new_game_btn.click(
379
+ fn=reset_tictactoe,
380
  inputs=[ttt_stats],
381
+ outputs=[*board_buttons, status_box, reasoning_box, ttt_stats]
382
  )
383
 
384
+ # Update stats display when ttt_stats changes
385
  ttt_stats.change(
386
+ fn=update_stats_display,
387
  inputs=ttt_stats,
388
+ outputs=stats_display
 
 
 
 
 
 
 
389
  )
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  return demo
392
 
393
 
394
  if __name__ == "__main__":
395
+ # Create and launch the Gradio interface
396
+ spiral_demo = create_interface()
397
+ spiral_demo.launch()