File size: 7,134 Bytes
05c9ac2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
using Unity.MLAgents.Actuators;
using Debug = UnityEngine.Debug;


namespace Unity.MLAgents.Integrations.Match3
{
    /// <summary>
    /// Actuator for a Match3 game. It translates valid moves (defined by AbstractBoard.IsMoveValid())
    /// in action masks, and applies the action to the board via AbstractBoard.MakeMove().
    /// </summary>
    public class Match3Actuator : IActuator, IBuiltInActuator
    {
        AbstractBoard m_Board;
        System.Random m_Random;
        ActionSpec m_ActionSpec;
        bool m_ForceHeuristic;
        BoardSize m_MaxBoardSize;

        /// <summary>
        /// Create a Match3Actuator.
        /// </summary>
        /// <param name="board"></param>
        /// <param name="forceHeuristic">Whether the inference action should be ignored and the Agent's Heuristic
        /// should be called. This should only be used for generating comparison stats of the Heuristic.</param>
        /// <param name="seed">The seed used to initialize <see cref="System.Random"/>.</param>
        /// <param name="name"></param>
        public Match3Actuator(AbstractBoard board,
                              bool forceHeuristic,
                              int seed,
                              string name)
        {
            m_Board = board;
            m_MaxBoardSize = m_Board.GetMaxBoardSize();
            Name = name;

            m_ForceHeuristic = forceHeuristic;

            var numMoves = Move.NumPotentialMoves(m_MaxBoardSize);
            m_ActionSpec = ActionSpec.MakeDiscrete(numMoves);
            m_Random = new System.Random(seed);
        }

        /// <inheritdoc/>
        public ActionSpec ActionSpec => m_ActionSpec;

        /// <inheritdoc/>
        public void OnActionReceived(ActionBuffers actions)
        {
            m_Board.CheckBoardSizes(m_MaxBoardSize);
            if (m_ForceHeuristic)
            {
                Heuristic(actions);
            }
            var moveIndex = actions.DiscreteActions[0];

            Move move = Move.FromMoveIndex(moveIndex, m_MaxBoardSize);
            m_Board.MakeMove(move);
        }

        /// <inheritdoc/>
        public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
        {
            var currentBoardSize = m_Board.GetCurrentBoardSize();
            m_Board.CheckBoardSizes(m_MaxBoardSize);
            const int branch = 0;
            bool foundValidMove = false;
            using (TimerStack.Instance.Scoped("WriteDiscreteActionMask"))
            {
                var numMoves = m_Board.NumMoves();

                var currentMove = Move.FromMoveIndex(0, m_MaxBoardSize);
                for (var i = 0; i < numMoves; i++)
                {
                    // Check that the move is allowed for the current boardSize (e.g. it won't move a piece out of
                    // bounds), and that it's allowed by the game itself.
                    if (currentMove.InRangeForBoard(currentBoardSize) && m_Board.IsMoveValid(currentMove))
                    {
                        foundValidMove = true;
                    }
                    else
                    {
                        actionMask.SetActionEnabled(branch, i, false);
                    }
                    currentMove.Next(m_MaxBoardSize);
                }

                if (!foundValidMove)
                {
                    // If all the moves are invalid and we mask all the actions out, this will cause an assert
                    // later on in IDiscreteActionMask. Instead, fire a callback to the user if they provided one,
                    // (or log a warning if not) and leave the last action unmasked. This isn't great, but
                    // an invalid move should be easier to handle than an exception..
                    if (m_Board.OnNoValidMovesAction != null)
                    {
                        m_Board.OnNoValidMovesAction();
                    }
                    else
                    {
                        Debug.LogWarning(
                            "No valid moves are available. The last action will be left unmasked, so " +
                            "an invalid move will be passed to AbstractBoard.MakeMove()."
                        );
                    }
                    actionMask.SetActionEnabled(branch, numMoves - 1, true);
                }
            }
        }

        /// <inheritdoc/>
        public string Name { get; }

        /// <inheritdoc/>
        public void ResetData()
        {
        }

        /// <inheritdoc/>
        public BuiltInActuatorType GetBuiltInActuatorType()
        {
            return BuiltInActuatorType.Match3Actuator;
        }

        /// <inheritdoc/>
        public void Heuristic(in ActionBuffers actionsOut)
        {
            var discreteActions = actionsOut.DiscreteActions;
            discreteActions[0] = GreedyMove();
        }

        /// <summary>
        /// Returns a valid move that gives the highest value for EvalMovePoints(). If multiple moves have the same
        /// value, one of them will be chosen with uniform probability.
        /// </summary>
        /// <remarks>
        /// By default, EvalMovePoints() returns 1, so all valid moves are equally likely. Inherit from this class and
        /// override EvalMovePoints() to use your game's scoring as a better estimate.
        /// </remarks>
        /// <returns></returns>
        internal int GreedyMove()
        {
            var bestMoveIndex = 0;
            var bestMovePoints = -1;
            var numMovesAtCurrentScore = 0;

            foreach (var move in m_Board.ValidMoves())
            {
                var movePoints = EvalMovePoints(move);
                if (movePoints < bestMovePoints)
                {
                    // Worse, skip
                    continue;
                }

                if (movePoints > bestMovePoints)
                {
                    // Better, keep
                    bestMovePoints = movePoints;
                    bestMoveIndex = move.MoveIndex;
                    numMovesAtCurrentScore = 1;
                }
                else
                {
                    // Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
                    // See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
                    numMovesAtCurrentScore++;
                    var randVal = m_Random.Next(0, numMovesAtCurrentScore);
                    if (randVal == 0)
                    {
                        // Keep the new one
                        bestMoveIndex = move.MoveIndex;
                    }
                }
            }

            return bestMoveIndex;
        }

        /// <summary>
        /// Method to be overridden when evaluating how many points a specific move will generate.
        /// </summary>
        /// <param name="move">The move to evaluate.</param>
        /// <returns>The number of points the move generates.</returns>
        protected virtual int EvalMovePoints(Move move)
        {
            return 1;
        }
    }
}