using System; | |
using UnityEngine; | |
using System.Collections.Generic; | |
using UnityEditor; | |
using Unity.MLAgents.Actuators; | |
using Unity.MLAgents.Inference; | |
using Unity.MLAgents.Policies; | |
using Unity.MLAgents.SideChannels; | |
using Unity.Barracuda; | |
/** | |
* Welcome to Unity Machine Learning Agents (ML-Agents). | |
* | |
* The ML-Agents toolkit contains four entities: Academy, Agent, Communicator and | |
* Python API. The academy and connected agents live within | |
* a learning environment (herein called Environment), while the communicator | |
* manages the communication between the learning environment and the Python | |
* API. For more information on each of these entities, in addition to how to | |
* set-up a learning environment and train the behavior of characters in a | |
* Unity scene, please browse our documentation pages on GitHub: | |
* https://github.com/Unity-Technologies/ml-agents/tree/release_20_docs/docs/ | |
*/ | |
namespace Unity.MLAgents | |
{ | |
/// <summary> | |
/// Helper class to step the Academy during FixedUpdate phase. | |
/// </summary> | |
internal class AcademyFixedUpdateStepper : MonoBehaviour | |
{ | |
void FixedUpdate() | |
{ | |
// Check if the stepper belongs to the current Academy and destroy it if it's not. | |
// This is to prevent from having leaked stepper from previous runs. | |
if (!Academy.IsInitialized || !Academy.Instance.IsStepperOwner(this)) | |
{ | |
Destroy(this.gameObject); | |
} | |
else | |
{ | |
Academy.Instance.EnvironmentStep(); | |
} | |
} | |
} | |
/// <summary> | |
/// The Academy singleton manages agent training and decision making. | |
/// </summary> | |
/// <remarks> | |
/// Access the Academy singleton through the <see cref="Instance"/> | |
/// property. The Academy instance is initialized the first time it is accessed (which will | |
/// typically be by the first <see cref="Agent"/> initialized in a scene). | |
/// | |
/// At initialization, the Academy attempts to connect to the Python training process through | |
/// the external communicator. If successful, the training process can train <see cref="Agent"/> | |
/// instances. When you set an agent's <see cref="BehaviorParameters.BehaviorType"/> setting | |
/// to <see cref="BehaviorType.Default"/>, the agent exchanges data with the training process | |
/// to make decisions. If no training process is available, agents with the default behavior | |
/// fall back to inference or heuristic decisions. (You can also set agents to always use | |
/// inference or heuristics.) | |
/// </remarks> | |
[ | |
] | |
public class Academy : IDisposable | |
{ | |
/// <summary> | |
/// Communication protocol version. | |
/// When connecting to python, this must be compatible with UnityEnvironment.API_VERSION. | |
/// We follow semantic versioning on the communication version, so existing | |
/// functionality will work as long the major versions match. | |
/// This should be changed whenever a change is made to the communication protocol. | |
/// </summary> | |
/// <remarks> | |
/// History: | |
/// <list type="bullet"> | |
/// <item> | |
/// <term>1.0.0</term> | |
/// <description>Initial version</description> | |
/// </item> | |
/// <item> | |
/// <term>1.1.0</term> | |
/// <description>Support concatenated PNGs for compressed observations.</description> | |
/// </item> | |
/// <item> | |
/// <term>1.2.0</term> | |
/// <description>Support compression mapping for stacked compressed observations.</description> | |
/// </item> | |
/// <item> | |
/// <term>1.3.0</term> | |
/// <description>Support both continuous and discrete actions.</description> | |
/// </item> | |
/// <item> | |
/// <term>1.4.0</term> | |
/// <description>Support training analytics sent from python trainer to the editor.</description> | |
/// </item> | |
/// <item> | |
/// <term>1.5.0</term> | |
/// <description>Support variable length observation training and multi-agent groups.</description> | |
/// </item> | |
/// </list> | |
/// </remarks> | |
const string k_ApiVersion = "1.5.0"; | |
/// <summary> | |
/// Unity package version of com.unity.ml-agents. | |
/// This must match the version string in package.json and is checked in a unit test. | |
/// </summary> | |
internal const string k_PackageVersion = "2.3.0-exp.4"; | |
const int k_EditorTrainingPort = 5004; | |
const string k_PortCommandLineFlag = "--mlagents-port"; | |
// Lazy initializer pattern, see https://csharpindepth.com/articles/singleton#lazy | |
static Lazy<Academy> s_Lazy = new Lazy<Academy>(() => new Academy()); | |
/// <summary> | |
///Reports whether the Academy has been initialized yet. | |
/// </summary> | |
/// <value><c>True</c> if the Academy is initialized, <c>false</c> otherwise.</value> | |
public static bool IsInitialized | |
{ | |
get { return s_Lazy.IsValueCreated; } | |
} | |
/// <summary> | |
/// The singleton Academy object. | |
/// </summary> | |
/// <value>Getting the instance initializes the Academy, if necessary.</value> | |
public static Academy Instance { get { return s_Lazy.Value; } } | |
// Fields not provided in the Inspector. | |
/// <summary> | |
/// Reports whether or not the communicator is on. | |
/// </summary> | |
/// <seealso cref="ICommunicator"/> | |
/// <value> | |
/// <c>True</c>, if communicator is on, <c>false</c> otherwise. | |
/// </value> | |
public bool IsCommunicatorOn | |
{ | |
get { return Communicator != null; } | |
} | |
/// The number of episodes completed by the environment. Incremented | |
/// each time the environment is reset. | |
int m_EpisodeCount; | |
/// The number of steps completed within the current episode. Incremented | |
/// each time a step is taken in the environment. Is reset to 0 during | |
/// <see cref="EnvironmentReset"/>. | |
int m_StepCount; | |
/// The number of total number of steps completed during the whole simulation. Incremented | |
/// each time a step is taken in the environment. | |
int m_TotalStepCount; | |
/// Pointer to the communicator currently in use by the Academy. | |
internal ICommunicator Communicator; | |
bool m_Initialized; | |
List<ModelRunner> m_ModelRunners = new List<ModelRunner>(); | |
// Flag used to keep track of the first time the Academy is reset. | |
bool m_HadFirstReset; | |
// Detect an Academy step called by user code that is also called by the Academy. | |
private RecursionChecker m_StepRecursionChecker = new RecursionChecker("EnvironmentStep"); | |
// Random seed used for inference. | |
int m_InferenceSeed; | |
/// <summary> | |
/// Set the random seed used for inference. This should be set before any Agents are added | |
/// to the scene. The seed is passed to the ModelRunner constructor, and incremented each | |
/// time a new ModelRunner is created. | |
/// </summary> | |
public int InferenceSeed | |
{ | |
set { m_InferenceSeed = value; } | |
} | |
int m_NumAreas; | |
/// <summary> | |
/// Number of training areas to instantiate. | |
/// </summary> | |
public int NumAreas => m_NumAreas; | |
/// <summary> | |
/// Returns the RLCapabilities of the python client that the unity process is connected to. | |
/// </summary> | |
internal UnityRLCapabilities TrainerCapabilities { get; set; } | |
// The Academy uses a series of events to communicate with agents | |
// to facilitate synchronization. More specifically, it ensures | |
// that all the agents perform their steps in a consistent order (i.e. no | |
// agent can act based on a decision before another agent has had a chance | |
// to request a decision). | |
// Signals to all the Agents at each environment step so they can use | |
// their Policy to decide on their next action. | |
internal event Action DecideAction; | |
// Signals to all the listeners that the academy is being destroyed | |
internal event Action DestroyAction; | |
// Signals to the Agent that a new step is about to start. | |
// This will mark the Agent as Done if it has reached its maxSteps. | |
internal event Action AgentIncrementStep; | |
/// <summary> | |
/// Signals to all of the <see cref="Agent"/>s that their step is about to begin. | |
/// This is a good time for an <see cref="Agent"/> to decide if it would like to | |
/// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/> | |
/// for this step. Any other pre-step setup could be done during this event as well. | |
/// </summary> | |
public event Action<int> AgentPreStep; | |
// Signals to all the agents at each environment step so they can send | |
// their state to their Policy if they have requested a decision. | |
internal event Action AgentSendState; | |
// Signals to all the agents at each environment step so they can act if | |
// they have requested a decision. | |
internal event Action AgentAct; | |
// Signals to all the agents each time the Academy force resets. | |
internal event Action AgentForceReset; | |
/// <summary> | |
/// Signals that the Academy has been reset by the training process. | |
/// </summary> | |
public event Action OnEnvironmentReset; | |
AcademyFixedUpdateStepper m_FixedUpdateStepper; | |
GameObject m_StepperObject; | |
/// <summary> | |
/// Private constructor called the first time the Academy is used. | |
/// Academy uses this time to initialize internal data | |
/// structures, initialize the environment and check for the existence | |
/// of a communicator. | |
/// </summary> | |
protected Academy() | |
{ | |
Application.quitting += Dispose; | |
if (!CommunicatorFactory.CommunicatorRegistered) | |
{ | |
Debug.Log("Registered Communicator in Academy."); | |
CommunicatorFactory.Register<ICommunicator>(RpcCommunicator.Create); | |
} | |
LazyInitialize(); | |
EditorApplication.playModeStateChanged += HandleOnPlayModeChanged; | |
} | |
/// <summary> | |
/// Clean up the Academy when switching from edit mode to play mode | |
/// </summary> | |
/// <param name="state">State.</param> | |
void HandleOnPlayModeChanged(PlayModeStateChange state) | |
{ | |
if (state == PlayModeStateChange.ExitingEditMode) | |
{ | |
Dispose(); | |
} | |
} | |
/// <summary> | |
/// Initialize the Academy if it hasn't already been initialized. | |
/// This method is always safe to call; it will have no effect if the Academy is already | |
/// initialized. | |
/// </summary> | |
internal void LazyInitialize() | |
{ | |
if (!m_Initialized) | |
{ | |
InitializeEnvironment(); | |
m_Initialized = true; | |
} | |
} | |
/// <summary> | |
/// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating | |
/// a temporary GameObject with a MonoBehaviour that calls Academy.EnvironmentStep(). | |
/// </summary> | |
void EnableAutomaticStepping() | |
{ | |
if (m_FixedUpdateStepper != null) | |
{ | |
return; | |
} | |
m_StepperObject = new GameObject("AcademyFixedUpdateStepper"); | |
// Don't show this object in the hierarchy | |
m_StepperObject.hideFlags = HideFlags.HideInHierarchy; | |
m_FixedUpdateStepper = m_StepperObject.AddComponent<AcademyFixedUpdateStepper>(); | |
try | |
{ | |
// This try-catch is because DontDestroyOnLoad cannot be used in Editor Tests | |
GameObject.DontDestroyOnLoad(m_StepperObject); | |
} | |
catch { } | |
} | |
/// <summary> | |
/// Disable stepping of the Academy during the FixedUpdate phase. If this is called, the Academy must be | |
/// stepped manually by the user by calling Academy.EnvironmentStep(). | |
/// </summary> | |
void DisableAutomaticStepping() | |
{ | |
if (m_FixedUpdateStepper == null) | |
{ | |
return; | |
} | |
m_FixedUpdateStepper = null; | |
if (Application.isEditor) | |
{ | |
UnityEngine.Object.DestroyImmediate(m_StepperObject); | |
} | |
else | |
{ | |
UnityEngine.Object.Destroy(m_StepperObject); | |
} | |
m_StepperObject = null; | |
} | |
/// <summary> | |
/// Determines whether or not the Academy is automatically stepped during the FixedUpdate phase. | |
/// </summary> | |
/// <value>Set <c>true</c> to enable automatic stepping; <c>false</c> to disable.</value> | |
public bool AutomaticSteppingEnabled | |
{ | |
get { return m_FixedUpdateStepper != null; } | |
set | |
{ | |
if (value) | |
{ | |
EnableAutomaticStepping(); | |
} | |
else | |
{ | |
DisableAutomaticStepping(); | |
} | |
} | |
} | |
// Used to read Python-provided environment parameters | |
static int ReadPortFromArgs() | |
{ | |
var args = Environment.GetCommandLineArgs(); | |
var inputPort = ""; | |
for (var i = 0; i < args.Length; i++) | |
{ | |
if (args[i] == k_PortCommandLineFlag) | |
{ | |
inputPort = args[i + 1]; | |
} | |
} | |
try | |
{ | |
return int.Parse(inputPort); | |
} | |
catch | |
{ | |
// No arg passed, or malformed port number. | |
// Try connecting on the default editor port | |
return MLAgentsSettingsManager.Settings.ConnectTrainer ? MLAgentsSettingsManager.Settings.EditorPort : -1; | |
// This is an executable, so we don't try to connect. | |
return -1; | |
} | |
} | |
EnvironmentParameters m_EnvironmentParameters; | |
StatsRecorder m_StatsRecorder; | |
/// <summary> | |
/// Returns the <see cref="EnvironmentParameters"/> instance. If training | |
/// features such as Curriculum Learning or Environment Parameter Randomization are used, | |
/// then the values of the parameters generated from the training process can be | |
/// retrieved here. | |
/// </summary> | |
/// <returns></returns> | |
public EnvironmentParameters EnvironmentParameters | |
{ | |
get { return m_EnvironmentParameters; } | |
} | |
/// <summary> | |
/// Returns the <see cref="StatsRecorder"/> instance. This instance can be used | |
/// to record any statistics from the Unity environment. | |
/// </summary> | |
/// <returns></returns> | |
public StatsRecorder StatsRecorder | |
{ | |
get { return m_StatsRecorder; } | |
} | |
/// <summary> | |
/// Initializes the environment, configures it and initializes the Academy. | |
/// </summary> | |
void InitializeEnvironment() | |
{ | |
TimerStack.Instance.AddMetadata("communication_protocol_version", k_ApiVersion); | |
TimerStack.Instance.AddMetadata("com.unity.ml-agents_version", k_PackageVersion); | |
EnableAutomaticStepping(); | |
SideChannelManager.RegisterSideChannel(new EngineConfigurationChannel()); | |
SideChannelManager.RegisterSideChannel(new TrainingAnalyticsSideChannel()); | |
m_EnvironmentParameters = new EnvironmentParameters(); | |
m_StatsRecorder = new StatsRecorder(); | |
// Try to launch the communicator by using the arguments passed at launch | |
var port = ReadPortFromArgs(); | |
if (port > 0) | |
{ | |
Communicator = CommunicatorFactory.Create(); | |
} | |
if (Communicator == null && CommunicatorFactory.Enabled && port > 0) | |
{ | |
Debug.Log("Communicator failed to start!"); | |
} | |
if (Communicator != null) | |
{ | |
// We try to exchange the first message with Python. If this fails, it means | |
// no Python Process is ready to train the environment. In this case, the | |
// environment must use Inference. | |
bool initSuccessful = false; | |
var communicatorInitParams = new CommunicatorInitParameters | |
{ | |
port = port, | |
unityCommunicationVersion = k_ApiVersion, | |
unityPackageVersion = k_PackageVersion, | |
name = "AcademySingleton", | |
CSharpCapabilities = new UnityRLCapabilities() | |
}; | |
try | |
{ | |
initSuccessful = Communicator.Initialize( | |
communicatorInitParams, | |
out var unityRlInitParameters | |
); | |
if (initSuccessful) | |
{ | |
UnityEngine.Random.InitState(unityRlInitParameters.seed); | |
// We might have inference-only Agents, so set the seed for them too. | |
m_InferenceSeed = unityRlInitParameters.seed; | |
m_NumAreas = unityRlInitParameters.numAreas; | |
TrainerCapabilities = unityRlInitParameters.TrainerCapabilities; | |
TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities(); | |
} | |
else | |
{ | |
Debug.Log($"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. Will perform inference instead."); | |
Communicator = null; | |
} | |
} | |
catch (Exception ex) | |
{ | |
Debug.Log($"Unexpected exception when trying to initialize communication: {ex}\nWill perform inference instead."); | |
Communicator = null; | |
} | |
} | |
if (Communicator != null) | |
{ | |
Communicator.QuitCommandReceived += OnQuitCommandReceived; | |
Communicator.ResetCommandReceived += OnResetCommand; | |
} | |
// If a communicator is enabled/provided, then we assume we are in | |
// training mode. In the absence of a communicator, we assume we are | |
// in inference mode. | |
ResetActions(); | |
} | |
void ResetActions() | |
{ | |
DecideAction = () => { }; | |
DestroyAction = () => { }; | |
AgentPreStep = i => { }; | |
AgentSendState = () => { }; | |
AgentAct = () => { }; | |
AgentForceReset = () => { }; | |
OnEnvironmentReset = () => { }; | |
} | |
static void OnQuitCommandReceived() | |
{ | |
EditorApplication.isPlaying = false; | |
Application.Quit(); | |
} | |
void OnResetCommand() | |
{ | |
ForcedFullReset(); | |
} | |
/// <summary> | |
/// The current episode count. | |
/// </summary> | |
/// <value> | |
/// Current episode number. | |
/// </value> | |
public int EpisodeCount | |
{ | |
get { return m_EpisodeCount; } | |
} | |
/// <summary> | |
/// The current step count (within the current episode). | |
/// </summary> | |
/// <value> | |
/// Current step count. | |
/// </value> | |
public int StepCount | |
{ | |
get { return m_StepCount; } | |
} | |
/// <summary> | |
/// Returns the total step count. | |
/// </summary> | |
/// <value> | |
/// Total step count. | |
/// </value> | |
public int TotalStepCount | |
{ | |
get { return m_TotalStepCount; } | |
} | |
/// <summary> | |
/// Forces the full reset. The done flags are not affected. Is either | |
/// called the first reset at inference and every external reset | |
/// at training. | |
/// </summary> | |
void ForcedFullReset() | |
{ | |
EnvironmentReset(); | |
AgentForceReset?.Invoke(); | |
m_HadFirstReset = true; | |
} | |
/// <summary> | |
/// Performs a single environment update of the Academy and Agent | |
/// objects within the environment. | |
/// </summary> | |
public void EnvironmentStep() | |
{ | |
using (m_StepRecursionChecker.Start()) | |
{ | |
if (!m_HadFirstReset) | |
{ | |
ForcedFullReset(); | |
} | |
AgentPreStep?.Invoke(m_StepCount); | |
m_StepCount += 1; | |
m_TotalStepCount += 1; | |
AgentIncrementStep?.Invoke(); | |
using (TimerStack.Instance.Scoped("AgentSendState")) | |
{ | |
AgentSendState?.Invoke(); | |
} | |
using (TimerStack.Instance.Scoped("DecideAction")) | |
{ | |
DecideAction?.Invoke(); | |
} | |
// If the communicator is not on, we need to clear the SideChannel sending queue | |
if (!IsCommunicatorOn) | |
{ | |
SideChannelManager.GetSideChannelMessage(); | |
} | |
using (TimerStack.Instance.Scoped("AgentAct")) | |
{ | |
AgentAct?.Invoke(); | |
} | |
} | |
} | |
/// <summary> | |
/// Resets the environment, including the Academy. | |
/// </summary> | |
void EnvironmentReset() | |
{ | |
m_StepCount = 0; | |
m_EpisodeCount++; | |
OnEnvironmentReset?.Invoke(); | |
} | |
/// <summary> | |
/// Creates or retrieves an existing ModelRunner that uses the same | |
/// NNModel and the InferenceDevice as provided. | |
/// </summary> | |
/// <param name="model">The NNModel the ModelRunner must use.</param> | |
/// <param name="actionSpec"> Description of the actions for the Agent.</param> | |
/// <param name="inferenceDevice"> | |
/// The inference device (CPU or GPU) the ModelRunner will use. | |
/// </param> | |
/// <param name="deterministicInference"> Inference only: set to true if the action selection from model should be | |
/// Deterministic. </param> | |
/// <returns> The ModelRunner compatible with the input settings.</returns> | |
internal ModelRunner GetOrCreateModelRunner( | |
NNModel model, ActionSpec actionSpec, InferenceDevice inferenceDevice, bool deterministicInference = false) | |
{ | |
var modelRunner = m_ModelRunners.Find(x => x.HasModel(model, inferenceDevice)); | |
if (modelRunner == null) | |
{ | |
modelRunner = new ModelRunner(model, actionSpec, inferenceDevice, m_InferenceSeed, deterministicInference); | |
m_ModelRunners.Add(modelRunner); | |
m_InferenceSeed++; | |
} | |
return modelRunner; | |
} | |
/// <summary> | |
/// Shut down the Academy. | |
/// </summary> | |
public void Dispose() | |
{ | |
DisableAutomaticStepping(); | |
// Signal to listeners that the academy is being destroyed now | |
DestroyAction?.Invoke(); | |
Communicator?.Dispose(); | |
Communicator = null; | |
m_EnvironmentParameters.Dispose(); | |
m_StatsRecorder.Dispose(); | |
SideChannelManager.UnregisterAllSideChannels(); // unregister custom side channels | |
if (m_ModelRunners != null) | |
{ | |
foreach (var mr in m_ModelRunners) | |
{ | |
mr.Dispose(); | |
} | |
m_ModelRunners = null; | |
} | |
// Clear out the actions so we're not keeping references to any old objects | |
ResetActions(); | |
// TODO - Pass worker ID or some other identifier, | |
// so that multiple envs won't overwrite each others stats. | |
TimerStack.Instance.SaveJsonTimers(); | |
m_Initialized = false; | |
// Reset the Lazy instance | |
s_Lazy = new Lazy<Academy>(() => new Academy()); | |
} | |
/// <summary> | |
/// Check if the input AcademyFixedUpdateStepper belongs to this Academy. | |
/// </summary> | |
internal bool IsStepperOwner(AcademyFixedUpdateStepper stepper) | |
{ | |
return GameObject.ReferenceEquals(stepper.gameObject, Academy.Instance.m_StepperObject); | |
} | |
} | |
} | |