File size: 6,535 Bytes
05c9ac2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
using System;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;

namespace Unity.MLAgents
{
    public struct CommunicatorInitParameters
    {
        /// <summary>
        /// Port to listen for connections on.
        /// </summary>
        public int port;

        /// <summary>
        /// The name of the environment.
        /// </summary>
        public string name;

        /// <summary>
        /// The version of the Unity SDK.
        /// </summary>
        public string unityPackageVersion;

        /// <summary>
        /// The version of the communication API.
        /// </summary>
        public string unityCommunicationVersion;

        /// <summary>
        /// The RL capabilities of the C# codebase.
        /// </summary>
        public UnityRLCapabilities CSharpCapabilities;
    }
    public struct UnityRLInitParameters
    {
        /// <summary>
        /// A random number generator (RNG) seed sent from the python process to Unity.
        /// </summary>
        public int seed;

        /// <summary>
        /// The number of areas to replicate if Training Area Replication is used in the scene.
        /// </summary>
        public int numAreas;

        /// <summary>
        /// The library version of the python process.
        /// </summary>
        public string pythonLibraryVersion;

        /// <summary>
        /// The version of the communication API that python is using.
        /// </summary>
        public string pythonCommunicationVersion;

        /// <summary>
        /// The RL capabilities of the Trainer codebase.
        /// </summary>
        public UnityRLCapabilities TrainerCapabilities;
    }
    internal struct UnityRLInputParameters
    {
        /// <summary>
        /// Boolean sent back from python to indicate whether or not training is happening.
        /// </summary>
        public bool isTraining;
    }

    /// <summary>
    /// Delegate for handling quit events sent back from the communicator.
    /// </summary>
    public delegate void QuitCommandHandler();

    /// <summary>
    /// Delegate for handling reset parameter updates sent from the communicator.
    /// </summary>
    public delegate void ResetCommandHandler();

    /// <summary>
    /// Delegate to handle UnityRLInputParameters updates from the communicator.
    /// </summary>
    /// <param name="inputParams"></param>
    internal delegate void RLInputReceivedHandler(UnityRLInputParameters inputParams);

    /**
    This is the interface of the Communicators.
    This does not need to be modified nor implemented to create a Unity environment.

    When the Unity Communicator is initialized, it will wait for the External Communicator
    to be initialized as well. The two communicators will then exchange their first messages
    that will usually contain information for initialization (information that does not need
    to be resent at each new exchange).

    By convention a Unity input is from External to Unity and a Unity output is from Unity to
    External. Inputs and outputs are relative to Unity.

    By convention, when the Unity Communicator and External Communicator call exchange, the
    exchange is NOT simultaneous but sequential. This means that when a side of the
    communication calls exchange, the other will receive the result of its previous
    exchange call.
    This is what happens when A calls exchange a single time:
    A sends data_1 to B -> B receives data_1 -> B generates and sends data_2 -> A receives data_2
    When A calls exchange, it sends data_1 and receives data_2

    Since the messages are sent back and forth with exchange and simultaneously when calling
    initialize, External sends two messages at initialization.

    The structure of the messages is as follows:
    UnityMessage
    ...Header
    ...UnityOutput
    ......UnityRLOutput
    ......UnityRLInitializationOutput
    ...UnityInput
    ......UnityRLInput
    ......UnityRLInitializationInput

    UnityOutput and UnityInput can be extended to provide functionalities beyond RL
    UnityRLOutput and UnityRLInput can be extended to provide new RL functionalities
     */
    public interface ICommunicator : IDisposable
    {
        /// <summary>
        /// Quit was received by the communicator.
        /// </summary>
        event QuitCommandHandler QuitCommandReceived;

        /// <summary>
        /// Reset command sent back from the communicator.
        /// </summary>
        event ResetCommandHandler ResetCommandReceived;

        /// <summary>
        /// Sends the academy parameters through the Communicator.
        /// Is used by the academy to send the AcademyParameters to the communicator.
        /// </summary>
        /// <returns>Whether the connection was successful.</returns>
        /// <param name="initParameters">The Unity Initialization Parameters to be sent.</param>
        /// <param name="initParametersOut">The External Initialization Parameters received</param>
        bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut);

        /// <summary>
        /// Registers a new Brain to the Communicator.
        /// </summary>
        /// <param name="name">The name or key uniquely identifying the Brain.</param>
        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
        void SubscribeBrain(string name, ActionSpec actionSpec);

        /// <summary>
        /// Sends the observations of one Agent.
        /// </summary>
        /// <param name="brainKey">Batch Key.</param>
        /// <param name="info">Agent info.</param>
        /// <param name="sensors">The list of ISensors of the Agent.</param>
        void PutObservations(string brainKey, AgentInfo info, List<ISensor> sensors);

        /// <summary>
        /// Signals the ICommunicator that the Agents are now ready to receive their action
        /// and that if the communicator has not yet received an action for one of the Agents
        /// it needs to get one at this point.
        /// </summary>
        void DecideBatch();

        /// <summary>
        /// Gets the AgentActions based on the batching key.
        /// </summary>
        /// <param name="key">A key to identify which behavior actions to get.</param>
        /// <param name="agentId">A key to identify which Agent actions to get.</param>
        /// <returns></returns>
        ActionBuffers GetActions(string key, int agentId);
    }
}