import os import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download#, login import numpy as np #login(os.getenv("HF_TOKEN")) my bad now its public model = Llama( model_path=hf_hub_download( repo_id=os.environ.get("REPO_ID", "Lyte/QuadConnect2.5-0.5B-v0.0.3b"), filename=os.environ.get("MODEL_FILE", "quadconnect_q8_0.gguf"), ) ) SYSTEM_PROMPT = """You are a Connect Four player. Connect Four is played on a 6x7 grid. Given the current board state, predict the next *column* to play in. Moves are specified by the column letter (a-g). Respond in the following format: Explain your reasoning, considering the board state and potential future moves. The column letter (a-g) of your move. """ class ConnectFour: def __init__(self): self.board = np.zeros((6, 7)) self.current_player = 1 # 1 for player, 2 for AI self.game_over = False def make_move(self, col): if self.game_over: return False, -1 # Find the lowest empty row in the selected column for row in range(5, -1, -1): if self.board[row][col] == 0: self.board[row][col] = self.current_player return True, row return False, -1 def check_winner(self): # Check horizontal for row in range(6): for col in range(4): if (self.board[row][col] != 0 and self.board[row][col] == self.board[row][col+1] == self.board[row][col+2] == self.board[row][col+3]): return self.board[row][col] # Check vertical for row in range(3): for col in range(7): if (self.board[row][col] != 0 and self.board[row][col] == self.board[row+1][col] == self.board[row+2][col] == self.board[row+3][col]): return self.board[row][col] # Check diagonal (positive slope) for row in range(3): for col in range(4): if (self.board[row][col] != 0 and self.board[row][col] == self.board[row+1][col+1] == self.board[row+2][col+2] == self.board[row+3][col+3]): return self.board[row][col] # Check diagonal (negative slope) for row in range(3, 6): for col in range(4): if (self.board[row][col] != 0 and self.board[row][col] == self.board[row-1][col+1] == self.board[row-2][col+2] == self.board[row-3][col+3]): return self.board[row][col] return 0 def board_to_string(self): moves = [] for row in range(6): for col in range(7): if self.board[row][col] != 0: col_letter = chr(ord('a') + col) row_num = str(6 - row) # Convert to 1-based indexing #player = "X" if self.board[row][col] == 1 else "O" moves.append(f"{col_letter}{row_num}")#={player}") return ", ".join(moves) def parse_ai_move(self, move_str): # Parse move like 'a1', 'b3', etc. col = ord(move_str[0].lower()) - ord('a') return col def create_interface(): game = ConnectFour() css = """ .connect4-board { display: grid; grid-template-columns: repeat(7, 1fr); gap: 8px; max-width: 600px; margin: 10px auto; background: #2196F3; padding: 15px; border-radius: 15px; box-shadow: 0 4px 8px rgba(0,0,0,0.2); } .connect4-cell { aspect-ratio: 1; background: white; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 2em; } .player1 { background: #f44336 !important; } .player2 { background: #ffc107 !important; } #ai-status { font-size: 1.2em; margin: 10px 0; color: #2196F3; font-weight: bold; } #ai-reasoning { background: #22004d; border-radius: 10px; padding: 15px; margin: 15px 0; font-family: monospace; min-height: 100px; } .reasoning-box { border-left: 4px solid #2196F3; padding-left: 15px; margin: 10px 0; background: #22004d; border-radius: 0 10px 10px 0; } #column-buttons { display: flex; justify-content: center; align-items: anchor-center; max-width: 600px; margin: 0 auto; padding: 0 15px; } #column-buttons button { margin: 0px 5px; } div.svelte-1nguped { display: block; } """ with gr.Blocks(css=css) as interface: gr.Markdown("# 🎮 Connect Four vs AI") gr.Markdown("### This is just a quick prototype for now, and the current model was trained just for 200 steps to test the concept, the reward functions were flawed, update coming soon!") with gr.Row(): with gr.Column(scale=2): # Status display status = gr.Markdown("Your turn! Click a button to drop your piece!", elem_id="ai-status") # Column buttons with gr.Group(elem_id="column-buttons", elem_classes=["fitter"]): col_buttons = [] for i in range(7): btn = gr.Button(f"⬇️ {i+1}", scale=1) col_buttons.append(btn) # Game board board_display = gr.HTML(render_board(), elem_id="board-display") reset_btn = gr.Button("🔄 New Game", variant="primary") with gr.Column(scale=1): # AI reasoning display gr.Markdown("### 🤖 AI's Thoughts") reasoning_display = gr.HTML( value='
Waiting for your move...
', elem_id="ai-reasoning-container" ) def handle_move(col): if game.game_over: return [ render_board(game.board), "Game is over! Click New Game to play again.", '
Game Over!
' ] # Player move success, row = game.make_move(col) if not success: return [ render_board(game.board), "Column is full! Try another one.", '
Invalid move!
' ] # Check for winner winner = game.check_winner() if winner == 1: game.game_over = True return [ render_board(game.board), "🎉 You win! 🎉", '
Congratulations! You won!
' ] # AI move game.current_player = 2 board_state = game.board_to_string() prompt = f"Current Board: {board_state}. Make a move." #print(prompt) # Get AI response response = model.create_chat_completion( messages=[ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt} ], temperature=0.7, max_tokens=512 ) ai_response = response['choices'][0]['message']['content'] # Extract reasoning and move try: reasoning = ai_response.split("")[1].split("")[0].strip() move_str = ai_response.split("")[1].split("")[0].strip() ai_col = game.parse_ai_move(move_str) # Format reasoning for display reasoning_html = f'''

🤔 Reasoning:

{reasoning}

📍 Move chosen: {move_str}

''' success, _ = game.make_move(ai_col) if success: # Check for AI winner winner = game.check_winner() if winner == 2: game.game_over = True return [ render_board(game.board), "🤖 AI wins! Better luck next time!", reasoning_html ] else: return [ render_board(game.board), "AI made invalid move! You win by default!", '
AI made an invalid move!
' ] except Exception as e: game.game_over = True return [ render_board(game.board), "AI error occurred! You win by default!", f'
Error: {str(e)}
' ] game.current_player = 1 return [render_board(game.board), "Your turn!", reasoning_html] def reset_game(): game.board = np.zeros((6, 7)) game.current_player = 1 game.game_over = False return [ render_board(), "Your turn! Click a button to drop your piece!", '
New game started! Make your move...
' ] # Event handlers for i, btn in enumerate(col_buttons): btn.click( fn=handle_move, inputs=[gr.Number(value=i, visible=False)], outputs=[board_display, status, reasoning_display] ) reset_btn.click( fn=reset_game, outputs=[board_display, status, reasoning_display] ) return interface def render_board(board=None): if board is None: board = np.zeros((6, 7)) html = '
' for row in range(6): for col in range(7): cell_class = "connect4-cell" content = "⚪" if board[row][col] == 1: cell_class += " player1" content = "🔴" elif board[row][col] == 2: cell_class += " player2" content = "🟡" html += f'
{content}
' html += "
" return html # Launch the interface if __name__ == "__main__": interface = create_interface() interface.launch()