File size: 22,901 Bytes
e11e4fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
import atexit
from distutils.version import StrictVersion

import numpy as np
import os
import subprocess
from typing import Dict, List, Optional, Tuple, Mapping as MappingType

import mlagents_envs

from mlagents_envs.logging_util import get_logger
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel import DefaultTrainingAnalyticsSideChannel
from mlagents_envs.side_channel.side_channel_manager import SideChannelManager
from mlagents_envs import env_utils

from mlagents_envs.base_env import (
    BaseEnv,
    DecisionSteps,
    TerminalSteps,
    BehaviorSpec,
    ActionTuple,
    BehaviorName,
    AgentId,
    BehaviorMapping,
)
from mlagents_envs.timers import timed, hierarchical_timer
from mlagents_envs.exception import (
    UnityEnvironmentException,
    UnityActionException,
    UnityTimeOutException,
    UnityCommunicatorStoppedException,
)

from mlagents_envs.communicator_objects.command_pb2 import STEP, RESET
from mlagents_envs.rpc_utils import behavior_spec_from_proto, steps_from_proto

from mlagents_envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto
from mlagents_envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
from mlagents_envs.communicator_objects.unity_output_pb2 import UnityOutputProto
from mlagents_envs.communicator_objects.capabilities_pb2 import UnityRLCapabilitiesProto
from mlagents_envs.communicator_objects.unity_rl_initialization_input_pb2 import (
    UnityRLInitializationInputProto,
)

from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto

from .rpc_communicator import RpcCommunicator
import signal

logger = get_logger(__name__)


class UnityEnvironment(BaseEnv):
    # Communication protocol version.
    # When connecting to C#, this must be compatible with Academy.k_ApiVersion.
    # We follow semantic versioning on the communication version, so existing
    # functionality will work as long the major versions match.
    # This should be changed whenever a change is made to the communication protocol.
    # Revision history:
    #  * 1.0.0 - initial version
    #  * 1.1.0 - support concatenated PNGs for compressed observations.
    #  * 1.2.0 - support compression mapping for stacked compressed observations.
    #  * 1.3.0 - support action spaces with both continuous and discrete actions.
    #  * 1.4.0 - support training analytics sent from python trainer to the editor.
    #  * 1.5.0 - support variable length observation training and multi-agent groups.
    API_VERSION = "1.5.0"

    # Default port that the editor listens on. If an environment executable
    # isn't specified, this port will be used.
    DEFAULT_EDITOR_PORT = 5004

    # Default base port for environments. Each environment will be offset from this
    # by it's worker_id.
    BASE_ENVIRONMENT_PORT = 5005

    # Command line argument used to pass the port to the executable environment.
    _PORT_COMMAND_LINE_ARG = "--mlagents-port"

    @staticmethod
    def _raise_version_exception(unity_com_ver: str) -> None:
        raise UnityEnvironmentException(
            f"The communication API version is not compatible between Unity and python. "
            f"Python API: {UnityEnvironment.API_VERSION}, Unity API: {unity_com_ver}.\n "
            f"Please find the versions that work best together from our release page.\n"
            "https://github.com/Unity-Technologies/ml-agents/releases"
        )

    @staticmethod
    def _check_communication_compatibility(
        unity_com_ver: str, python_api_version: str, unity_package_version: str
    ) -> bool:
        unity_communicator_version = StrictVersion(unity_com_ver)
        api_version = StrictVersion(python_api_version)
        if unity_communicator_version.version[0] == 0:
            if (
                unity_communicator_version.version[0] != api_version.version[0]
                or unity_communicator_version.version[1] != api_version.version[1]
            ):
                # Minor beta versions differ.
                return False
        elif unity_communicator_version.version[0] != api_version.version[0]:
            # Major versions mismatch.
            return False
        else:
            # Major versions match, so either:
            # 1) The versions are identical, in which case there's no compatibility issues
            # 2) The Unity version is newer, in which case we'll warn or fail on the Unity side if trying to use
            #    unsupported features
            # 3) The trainer version is newer, in which case new trainer features might be available but unused by C#
            # In any of the cases, there's no reason to warn about mismatch here.
            logger.info(
                f"Connected to Unity environment with package version {unity_package_version} "
                f"and communication version {unity_com_ver}"
            )
        return True

    @staticmethod
    def _get_capabilities_proto() -> UnityRLCapabilitiesProto:
        capabilities = UnityRLCapabilitiesProto()
        capabilities.baseRLCapabilities = True
        capabilities.concatenatedPngObservations = True
        capabilities.compressedChannelMapping = True
        capabilities.hybridActions = True
        capabilities.trainingAnalytics = True
        capabilities.variableLengthObservation = True
        capabilities.multiAgentGroups = True
        return capabilities

    @staticmethod
    def _warn_csharp_base_capabilities(
        caps: UnityRLCapabilitiesProto, unity_package_ver: str, python_package_ver: str
    ) -> None:
        if not caps.baseRLCapabilities:
            logger.warning(
                "WARNING: The Unity process is not running with the expected base Reinforcement Learning"
                " capabilities. Please be sure upgrade the Unity Package to a version that is compatible with this "
                "python package.\n"
                f"Python package version: {python_package_ver}, C# package version: {unity_package_ver}"
                f"Please find the versions that work best together from our release page.\n"
                "https://github.com/Unity-Technologies/ml-agents/releases"
            )

    def __init__(
        self,
        file_name: Optional[str] = None,
        worker_id: int = 0,
        base_port: Optional[int] = None,
        seed: int = 0,
        no_graphics: bool = False,
        timeout_wait: int = 60,
        additional_args: Optional[List[str]] = None,
        side_channels: Optional[List[SideChannel]] = None,
        log_folder: Optional[str] = None,
        num_areas: int = 1,
    ):
        """
        Starts a new unity environment and establishes a connection with the environment.
        Notice: Currently communication between Unity and Python takes place over an open socket without authentication.
        Ensure that the network where training takes place is secure.

        :string file_name: Name of Unity environment binary.
        :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
        If no environment is specified (i.e. file_name is None), the DEFAULT_EDITOR_PORT will be used.
        :int worker_id: Offset from base_port. Used for training multiple environments simultaneously.
        :bool no_graphics: Whether to run the Unity simulator in no-graphics mode
        :int timeout_wait: Time (in seconds) to wait for connection from environment.
        :list args: Addition Unity command line arguments
        :list side_channels: Additional side channel for no-rl communication with Unity
        :str log_folder: Optional folder to write the Unity Player log file into.  Requires absolute path.
        """
        atexit.register(self._close)
        self._additional_args = additional_args or []
        self._no_graphics = no_graphics
        # If base port is not specified, use BASE_ENVIRONMENT_PORT if we have
        # an environment, otherwise DEFAULT_EDITOR_PORT
        if base_port is None:
            base_port = (
                self.BASE_ENVIRONMENT_PORT if file_name else self.DEFAULT_EDITOR_PORT
            )
        self._port = base_port + worker_id
        self._buffer_size = 12000
        # If true, this means the environment was successfully loaded
        self._loaded = False
        # The process that is started. If None, no process was started
        self._process: Optional[subprocess.Popen] = None
        self._timeout_wait: int = timeout_wait
        self._communicator = self._get_communicator(worker_id, base_port, timeout_wait)
        self._worker_id = worker_id
        if side_channels is None:
            side_channels = []
        default_training_side_channel: Optional[
            DefaultTrainingAnalyticsSideChannel
        ] = None
        if DefaultTrainingAnalyticsSideChannel.CHANNEL_ID not in [
            _.channel_id for _ in side_channels
        ]:
            default_training_side_channel = DefaultTrainingAnalyticsSideChannel()
            side_channels.append(default_training_side_channel)
        self._side_channel_manager = SideChannelManager(side_channels)
        self._log_folder = log_folder
        self.academy_capabilities: UnityRLCapabilitiesProto = None  # type: ignore

        # If the environment name is None, a new environment will not be launched
        # and the communicator will directly try to connect to an existing unity environment.
        # If the worker-id is not 0 and the environment name is None, an error is thrown
        if file_name is None and worker_id != 0:
            raise UnityEnvironmentException(
                "If the environment name is None, "
                "the worker-id must be 0 in order to connect with the Editor."
            )
        if file_name is not None:
            try:
                self._process = env_utils.launch_executable(
                    file_name, self._executable_args()
                )
            except UnityEnvironmentException:
                self._close(0)
                raise
        else:
            logger.info(
                f"Listening on port {self._port}. "
                f"Start training by pressing the Play button in the Unity Editor."
            )
        self._loaded = True

        rl_init_parameters_in = UnityRLInitializationInputProto(
            seed=seed,
            communication_version=self.API_VERSION,
            package_version=mlagents_envs.__version__,
            capabilities=UnityEnvironment._get_capabilities_proto(),
            num_areas=num_areas,
        )
        try:
            aca_output = self._send_academy_parameters(rl_init_parameters_in)
            aca_params = aca_output.rl_initialization_output
        except UnityTimeOutException:
            self._close(0)
            raise

        if not UnityEnvironment._check_communication_compatibility(
            aca_params.communication_version,
            UnityEnvironment.API_VERSION,
            aca_params.package_version,
        ):
            self._close(0)
            UnityEnvironment._raise_version_exception(aca_params.communication_version)

        UnityEnvironment._warn_csharp_base_capabilities(
            aca_params.capabilities,
            aca_params.package_version,
            UnityEnvironment.API_VERSION,
        )

        self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
        self._env_specs: Dict[str, BehaviorSpec] = {}
        self._env_actions: Dict[str, ActionTuple] = {}
        self._is_first_message = True
        self._update_behavior_specs(aca_output)
        self.academy_capabilities = aca_params.capabilities
        if default_training_side_channel is not None:
            default_training_side_channel.environment_initialized()

    @staticmethod
    def _get_communicator(worker_id, base_port, timeout_wait):
        return RpcCommunicator(worker_id, base_port, timeout_wait)

    def _executable_args(self) -> List[str]:
        args: List[str] = []
        if self._no_graphics:
            args += ["-nographics", "-batchmode"]
        args += [UnityEnvironment._PORT_COMMAND_LINE_ARG, str(self._port)]

        # If the logfile arg isn't already set in the env args,
        # try to set it to an output directory
        logfile_set = "-logfile" in (arg.lower() for arg in self._additional_args)
        if self._log_folder and not logfile_set:
            log_file_path = os.path.join(
                self._log_folder, f"Player-{self._worker_id}.log"
            )
            args += ["-logFile", log_file_path]
        # Add in arguments passed explicitly by the user.
        args += self._additional_args
        return args

    def _update_behavior_specs(self, output: UnityOutputProto) -> None:
        init_output = output.rl_initialization_output
        for brain_param in init_output.brain_parameters:
            # Each BrainParameter in the rl_initialization_output should have at least one AgentInfo
            # Get that agent, because we need some of its observations.
            agent_infos = output.rl_output.agentInfos[brain_param.brain_name]
            if agent_infos.value:
                agent = agent_infos.value[0]
                new_spec = behavior_spec_from_proto(brain_param, agent)
                self._env_specs[brain_param.brain_name] = new_spec
                logger.info(f"Connected new brain: {brain_param.brain_name}")

    def _update_state(self, output: UnityRLOutputProto) -> None:
        """
        Collects experience information from all external brains in environment at current step.
        """
        for brain_name in self._env_specs.keys():
            if brain_name in output.agentInfos:
                agent_info_list = output.agentInfos[brain_name].value
                self._env_state[brain_name] = steps_from_proto(
                    agent_info_list, self._env_specs[brain_name]
                )
            else:
                self._env_state[brain_name] = (
                    DecisionSteps.empty(self._env_specs[brain_name]),
                    TerminalSteps.empty(self._env_specs[brain_name]),
                )
        self._side_channel_manager.process_side_channel_message(output.side_channel)

    def reset(self) -> None:
        if self._loaded:
            outputs = self._communicator.exchange(
                self._generate_reset_input(), self._poll_process
            )
            if outputs is None:
                raise UnityCommunicatorStoppedException("Communicator has exited.")
            self._update_behavior_specs(outputs)
            rl_output = outputs.rl_output
            self._update_state(rl_output)
            self._is_first_message = False
            self._env_actions.clear()
        else:
            raise UnityEnvironmentException("No Unity environment is loaded.")

    @timed
    def step(self) -> None:
        if self._is_first_message:
            return self.reset()
        if not self._loaded:
            raise UnityEnvironmentException("No Unity environment is loaded.")
        # fill the blanks for missing actions
        for group_name in self._env_specs:
            if group_name not in self._env_actions:
                n_agents = 0
                if group_name in self._env_state:
                    n_agents = len(self._env_state[group_name][0])
                self._env_actions[group_name] = self._env_specs[
                    group_name
                ].action_spec.empty_action(n_agents)
        step_input = self._generate_step_input(self._env_actions)
        with hierarchical_timer("communicator.exchange"):
            outputs = self._communicator.exchange(step_input, self._poll_process)
        if outputs is None:
            raise UnityCommunicatorStoppedException("Communicator has exited.")
        self._update_behavior_specs(outputs)
        rl_output = outputs.rl_output
        self._update_state(rl_output)
        self._env_actions.clear()

    @property
    def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
        return BehaviorMapping(self._env_specs)

    def _assert_behavior_exists(self, behavior_name: str) -> None:
        if behavior_name not in self._env_specs:
            raise UnityActionException(
                f"The group {behavior_name} does not correspond to an existing "
                f"agent group in the environment"
            )

    def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
        self._assert_behavior_exists(behavior_name)
        if behavior_name not in self._env_state:
            return
        action_spec = self._env_specs[behavior_name].action_spec
        num_agents = len(self._env_state[behavior_name][0])
        action = action_spec._validate_action(action, num_agents, behavior_name)
        self._env_actions[behavior_name] = action

    def set_action_for_agent(
        self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
    ) -> None:
        self._assert_behavior_exists(behavior_name)
        if behavior_name not in self._env_state:
            return
        action_spec = self._env_specs[behavior_name].action_spec
        action = action_spec._validate_action(action, 1, behavior_name)
        if behavior_name not in self._env_actions:
            num_agents = len(self._env_state[behavior_name][0])
            self._env_actions[behavior_name] = action_spec.empty_action(num_agents)
        try:
            index = np.where(self._env_state[behavior_name][0].agent_id == agent_id)[0][
                0
            ]
        except IndexError as ie:
            raise IndexError(
                "agent_id {} is did not request a decision at the previous step".format(
                    agent_id
                )
            ) from ie
        if action_spec.continuous_size > 0:
            self._env_actions[behavior_name].continuous[index] = action.continuous[0, :]
        if action_spec.discrete_size > 0:
            self._env_actions[behavior_name].discrete[index] = action.discrete[0, :]

    def get_steps(
        self, behavior_name: BehaviorName
    ) -> Tuple[DecisionSteps, TerminalSteps]:
        self._assert_behavior_exists(behavior_name)
        return self._env_state[behavior_name]

    def _poll_process(self) -> None:
        """
        Check the status of the subprocess. If it has exited, raise a UnityEnvironmentException
        :return: None
        """
        if not self._process:
            return
        poll_res = self._process.poll()
        if poll_res is not None:
            exc_msg = self._returncode_to_env_message(self._process.returncode)
            raise UnityEnvironmentException(exc_msg)

    def close(self):
        """
        Sends a shutdown signal to the unity environment, and closes the socket connection.
        """
        if self._loaded:
            self._close()
        else:
            raise UnityEnvironmentException("No Unity environment is loaded.")

    def _close(self, timeout: Optional[int] = None) -> None:
        """
        Close the communicator and environment subprocess (if necessary).

        :int timeout: [Optional] Number of seconds to wait for the environment to shut down before
            force-killing it.  Defaults to `self.timeout_wait`.
        """
        if timeout is None:
            timeout = self._timeout_wait
        self._loaded = False
        self._communicator.close()
        if self._process is not None:
            # Wait a bit for the process to shutdown, but kill it if it takes too long
            try:
                self._process.wait(timeout=timeout)
                logger.debug(self._returncode_to_env_message(self._process.returncode))
            except subprocess.TimeoutExpired:
                logger.warning("Environment timed out shutting down. Killing...")
                self._process.kill()
            # Set to None so we don't try to close multiple times.
            self._process = None

    @timed
    def _generate_step_input(
        self, vector_action: Dict[str, ActionTuple]
    ) -> UnityInputProto:
        rl_in = UnityRLInputProto()
        for b in vector_action:
            n_agents = len(self._env_state[b][0])
            if n_agents == 0:
                continue
            for i in range(n_agents):
                action = AgentActionProto()
                if vector_action[b].continuous is not None:
                    action.vector_actions_deprecated.extend(
                        vector_action[b].continuous[i]
                    )
                    action.continuous_actions.extend(vector_action[b].continuous[i])
                if vector_action[b].discrete is not None:
                    action.vector_actions_deprecated.extend(
                        vector_action[b].discrete[i]
                    )
                    action.discrete_actions.extend(vector_action[b].discrete[i])
                rl_in.agent_actions[b].value.extend([action])
                rl_in.command = STEP
        rl_in.side_channel = bytes(
            self._side_channel_manager.generate_side_channel_messages()
        )
        return self._wrap_unity_input(rl_in)

    def _generate_reset_input(self) -> UnityInputProto:
        rl_in = UnityRLInputProto()
        rl_in.command = RESET
        rl_in.side_channel = bytes(
            self._side_channel_manager.generate_side_channel_messages()
        )
        return self._wrap_unity_input(rl_in)

    def _send_academy_parameters(
        self, init_parameters: UnityRLInitializationInputProto
    ) -> UnityOutputProto:
        inputs = UnityInputProto()
        inputs.rl_initialization_input.CopyFrom(init_parameters)
        return self._communicator.initialize(inputs, self._poll_process)

    @staticmethod
    def _wrap_unity_input(rl_input: UnityRLInputProto) -> UnityInputProto:
        result = UnityInputProto()
        result.rl_input.CopyFrom(rl_input)
        return result

    @staticmethod
    def _returncode_to_signal_name(returncode: int) -> Optional[str]:
        """
        Try to convert return codes into their corresponding signal name.
        E.g. returncode_to_signal_name(-2) -> "SIGINT"
        """
        try:
            # A negative value -N indicates that the child was terminated by signal N (POSIX only).
            s = signal.Signals(-returncode)
            return s.name
        except Exception:
            # Should generally be a ValueError, but catch everything just in case.
            return None

    @staticmethod
    def _returncode_to_env_message(returncode: int) -> str:
        signal_name = UnityEnvironment._returncode_to_signal_name(returncode)
        signal_name = f" ({signal_name})" if signal_name else ""
        return f"Environment shut down with return code {returncode}{signal_name}."