Spaces:
Running
Running
File size: 22,901 Bytes
e11e4fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 |
import atexit
from distutils.version import StrictVersion
import numpy as np
import os
import subprocess
from typing import Dict, List, Optional, Tuple, Mapping as MappingType
import mlagents_envs
from mlagents_envs.logging_util import get_logger
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel import DefaultTrainingAnalyticsSideChannel
from mlagents_envs.side_channel.side_channel_manager import SideChannelManager
from mlagents_envs import env_utils
from mlagents_envs.base_env import (
BaseEnv,
DecisionSteps,
TerminalSteps,
BehaviorSpec,
ActionTuple,
BehaviorName,
AgentId,
BehaviorMapping,
)
from mlagents_envs.timers import timed, hierarchical_timer
from mlagents_envs.exception import (
UnityEnvironmentException,
UnityActionException,
UnityTimeOutException,
UnityCommunicatorStoppedException,
)
from mlagents_envs.communicator_objects.command_pb2 import STEP, RESET
from mlagents_envs.rpc_utils import behavior_spec_from_proto, steps_from_proto
from mlagents_envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto
from mlagents_envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
from mlagents_envs.communicator_objects.unity_output_pb2 import UnityOutputProto
from mlagents_envs.communicator_objects.capabilities_pb2 import UnityRLCapabilitiesProto
from mlagents_envs.communicator_objects.unity_rl_initialization_input_pb2 import (
UnityRLInitializationInputProto,
)
from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
from .rpc_communicator import RpcCommunicator
import signal
logger = get_logger(__name__)
class UnityEnvironment(BaseEnv):
# Communication protocol version.
# When connecting to C#, this must be compatible with Academy.k_ApiVersion.
# We follow semantic versioning on the communication version, so existing
# functionality will work as long the major versions match.
# This should be changed whenever a change is made to the communication protocol.
# Revision history:
# * 1.0.0 - initial version
# * 1.1.0 - support concatenated PNGs for compressed observations.
# * 1.2.0 - support compression mapping for stacked compressed observations.
# * 1.3.0 - support action spaces with both continuous and discrete actions.
# * 1.4.0 - support training analytics sent from python trainer to the editor.
# * 1.5.0 - support variable length observation training and multi-agent groups.
API_VERSION = "1.5.0"
# Default port that the editor listens on. If an environment executable
# isn't specified, this port will be used.
DEFAULT_EDITOR_PORT = 5004
# Default base port for environments. Each environment will be offset from this
# by it's worker_id.
BASE_ENVIRONMENT_PORT = 5005
# Command line argument used to pass the port to the executable environment.
_PORT_COMMAND_LINE_ARG = "--mlagents-port"
@staticmethod
def _raise_version_exception(unity_com_ver: str) -> None:
raise UnityEnvironmentException(
f"The communication API version is not compatible between Unity and python. "
f"Python API: {UnityEnvironment.API_VERSION}, Unity API: {unity_com_ver}.\n "
f"Please find the versions that work best together from our release page.\n"
"https://github.com/Unity-Technologies/ml-agents/releases"
)
@staticmethod
def _check_communication_compatibility(
unity_com_ver: str, python_api_version: str, unity_package_version: str
) -> bool:
unity_communicator_version = StrictVersion(unity_com_ver)
api_version = StrictVersion(python_api_version)
if unity_communicator_version.version[0] == 0:
if (
unity_communicator_version.version[0] != api_version.version[0]
or unity_communicator_version.version[1] != api_version.version[1]
):
# Minor beta versions differ.
return False
elif unity_communicator_version.version[0] != api_version.version[0]:
# Major versions mismatch.
return False
else:
# Major versions match, so either:
# 1) The versions are identical, in which case there's no compatibility issues
# 2) The Unity version is newer, in which case we'll warn or fail on the Unity side if trying to use
# unsupported features
# 3) The trainer version is newer, in which case new trainer features might be available but unused by C#
# In any of the cases, there's no reason to warn about mismatch here.
logger.info(
f"Connected to Unity environment with package version {unity_package_version} "
f"and communication version {unity_com_ver}"
)
return True
@staticmethod
def _get_capabilities_proto() -> UnityRLCapabilitiesProto:
capabilities = UnityRLCapabilitiesProto()
capabilities.baseRLCapabilities = True
capabilities.concatenatedPngObservations = True
capabilities.compressedChannelMapping = True
capabilities.hybridActions = True
capabilities.trainingAnalytics = True
capabilities.variableLengthObservation = True
capabilities.multiAgentGroups = True
return capabilities
@staticmethod
def _warn_csharp_base_capabilities(
caps: UnityRLCapabilitiesProto, unity_package_ver: str, python_package_ver: str
) -> None:
if not caps.baseRLCapabilities:
logger.warning(
"WARNING: The Unity process is not running with the expected base Reinforcement Learning"
" capabilities. Please be sure upgrade the Unity Package to a version that is compatible with this "
"python package.\n"
f"Python package version: {python_package_ver}, C# package version: {unity_package_ver}"
f"Please find the versions that work best together from our release page.\n"
"https://github.com/Unity-Technologies/ml-agents/releases"
)
def __init__(
self,
file_name: Optional[str] = None,
worker_id: int = 0,
base_port: Optional[int] = None,
seed: int = 0,
no_graphics: bool = False,
timeout_wait: int = 60,
additional_args: Optional[List[str]] = None,
side_channels: Optional[List[SideChannel]] = None,
log_folder: Optional[str] = None,
num_areas: int = 1,
):
"""
Starts a new unity environment and establishes a connection with the environment.
Notice: Currently communication between Unity and Python takes place over an open socket without authentication.
Ensure that the network where training takes place is secure.
:string file_name: Name of Unity environment binary.
:int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
If no environment is specified (i.e. file_name is None), the DEFAULT_EDITOR_PORT will be used.
:int worker_id: Offset from base_port. Used for training multiple environments simultaneously.
:bool no_graphics: Whether to run the Unity simulator in no-graphics mode
:int timeout_wait: Time (in seconds) to wait for connection from environment.
:list args: Addition Unity command line arguments
:list side_channels: Additional side channel for no-rl communication with Unity
:str log_folder: Optional folder to write the Unity Player log file into. Requires absolute path.
"""
atexit.register(self._close)
self._additional_args = additional_args or []
self._no_graphics = no_graphics
# If base port is not specified, use BASE_ENVIRONMENT_PORT if we have
# an environment, otherwise DEFAULT_EDITOR_PORT
if base_port is None:
base_port = (
self.BASE_ENVIRONMENT_PORT if file_name else self.DEFAULT_EDITOR_PORT
)
self._port = base_port + worker_id
self._buffer_size = 12000
# If true, this means the environment was successfully loaded
self._loaded = False
# The process that is started. If None, no process was started
self._process: Optional[subprocess.Popen] = None
self._timeout_wait: int = timeout_wait
self._communicator = self._get_communicator(worker_id, base_port, timeout_wait)
self._worker_id = worker_id
if side_channels is None:
side_channels = []
default_training_side_channel: Optional[
DefaultTrainingAnalyticsSideChannel
] = None
if DefaultTrainingAnalyticsSideChannel.CHANNEL_ID not in [
_.channel_id for _ in side_channels
]:
default_training_side_channel = DefaultTrainingAnalyticsSideChannel()
side_channels.append(default_training_side_channel)
self._side_channel_manager = SideChannelManager(side_channels)
self._log_folder = log_folder
self.academy_capabilities: UnityRLCapabilitiesProto = None # type: ignore
# If the environment name is None, a new environment will not be launched
# and the communicator will directly try to connect to an existing unity environment.
# If the worker-id is not 0 and the environment name is None, an error is thrown
if file_name is None and worker_id != 0:
raise UnityEnvironmentException(
"If the environment name is None, "
"the worker-id must be 0 in order to connect with the Editor."
)
if file_name is not None:
try:
self._process = env_utils.launch_executable(
file_name, self._executable_args()
)
except UnityEnvironmentException:
self._close(0)
raise
else:
logger.info(
f"Listening on port {self._port}. "
f"Start training by pressing the Play button in the Unity Editor."
)
self._loaded = True
rl_init_parameters_in = UnityRLInitializationInputProto(
seed=seed,
communication_version=self.API_VERSION,
package_version=mlagents_envs.__version__,
capabilities=UnityEnvironment._get_capabilities_proto(),
num_areas=num_areas,
)
try:
aca_output = self._send_academy_parameters(rl_init_parameters_in)
aca_params = aca_output.rl_initialization_output
except UnityTimeOutException:
self._close(0)
raise
if not UnityEnvironment._check_communication_compatibility(
aca_params.communication_version,
UnityEnvironment.API_VERSION,
aca_params.package_version,
):
self._close(0)
UnityEnvironment._raise_version_exception(aca_params.communication_version)
UnityEnvironment._warn_csharp_base_capabilities(
aca_params.capabilities,
aca_params.package_version,
UnityEnvironment.API_VERSION,
)
self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
self._env_specs: Dict[str, BehaviorSpec] = {}
self._env_actions: Dict[str, ActionTuple] = {}
self._is_first_message = True
self._update_behavior_specs(aca_output)
self.academy_capabilities = aca_params.capabilities
if default_training_side_channel is not None:
default_training_side_channel.environment_initialized()
@staticmethod
def _get_communicator(worker_id, base_port, timeout_wait):
return RpcCommunicator(worker_id, base_port, timeout_wait)
def _executable_args(self) -> List[str]:
args: List[str] = []
if self._no_graphics:
args += ["-nographics", "-batchmode"]
args += [UnityEnvironment._PORT_COMMAND_LINE_ARG, str(self._port)]
# If the logfile arg isn't already set in the env args,
# try to set it to an output directory
logfile_set = "-logfile" in (arg.lower() for arg in self._additional_args)
if self._log_folder and not logfile_set:
log_file_path = os.path.join(
self._log_folder, f"Player-{self._worker_id}.log"
)
args += ["-logFile", log_file_path]
# Add in arguments passed explicitly by the user.
args += self._additional_args
return args
def _update_behavior_specs(self, output: UnityOutputProto) -> None:
init_output = output.rl_initialization_output
for brain_param in init_output.brain_parameters:
# Each BrainParameter in the rl_initialization_output should have at least one AgentInfo
# Get that agent, because we need some of its observations.
agent_infos = output.rl_output.agentInfos[brain_param.brain_name]
if agent_infos.value:
agent = agent_infos.value[0]
new_spec = behavior_spec_from_proto(brain_param, agent)
self._env_specs[brain_param.brain_name] = new_spec
logger.info(f"Connected new brain: {brain_param.brain_name}")
def _update_state(self, output: UnityRLOutputProto) -> None:
"""
Collects experience information from all external brains in environment at current step.
"""
for brain_name in self._env_specs.keys():
if brain_name in output.agentInfos:
agent_info_list = output.agentInfos[brain_name].value
self._env_state[brain_name] = steps_from_proto(
agent_info_list, self._env_specs[brain_name]
)
else:
self._env_state[brain_name] = (
DecisionSteps.empty(self._env_specs[brain_name]),
TerminalSteps.empty(self._env_specs[brain_name]),
)
self._side_channel_manager.process_side_channel_message(output.side_channel)
def reset(self) -> None:
if self._loaded:
outputs = self._communicator.exchange(
self._generate_reset_input(), self._poll_process
)
if outputs is None:
raise UnityCommunicatorStoppedException("Communicator has exited.")
self._update_behavior_specs(outputs)
rl_output = outputs.rl_output
self._update_state(rl_output)
self._is_first_message = False
self._env_actions.clear()
else:
raise UnityEnvironmentException("No Unity environment is loaded.")
@timed
def step(self) -> None:
if self._is_first_message:
return self.reset()
if not self._loaded:
raise UnityEnvironmentException("No Unity environment is loaded.")
# fill the blanks for missing actions
for group_name in self._env_specs:
if group_name not in self._env_actions:
n_agents = 0
if group_name in self._env_state:
n_agents = len(self._env_state[group_name][0])
self._env_actions[group_name] = self._env_specs[
group_name
].action_spec.empty_action(n_agents)
step_input = self._generate_step_input(self._env_actions)
with hierarchical_timer("communicator.exchange"):
outputs = self._communicator.exchange(step_input, self._poll_process)
if outputs is None:
raise UnityCommunicatorStoppedException("Communicator has exited.")
self._update_behavior_specs(outputs)
rl_output = outputs.rl_output
self._update_state(rl_output)
self._env_actions.clear()
@property
def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
return BehaviorMapping(self._env_specs)
def _assert_behavior_exists(self, behavior_name: str) -> None:
if behavior_name not in self._env_specs:
raise UnityActionException(
f"The group {behavior_name} does not correspond to an existing "
f"agent group in the environment"
)
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:
return
action_spec = self._env_specs[behavior_name].action_spec
num_agents = len(self._env_state[behavior_name][0])
action = action_spec._validate_action(action, num_agents, behavior_name)
self._env_actions[behavior_name] = action
def set_action_for_agent(
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
) -> None:
self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:
return
action_spec = self._env_specs[behavior_name].action_spec
action = action_spec._validate_action(action, 1, behavior_name)
if behavior_name not in self._env_actions:
num_agents = len(self._env_state[behavior_name][0])
self._env_actions[behavior_name] = action_spec.empty_action(num_agents)
try:
index = np.where(self._env_state[behavior_name][0].agent_id == agent_id)[0][
0
]
except IndexError as ie:
raise IndexError(
"agent_id {} is did not request a decision at the previous step".format(
agent_id
)
) from ie
if action_spec.continuous_size > 0:
self._env_actions[behavior_name].continuous[index] = action.continuous[0, :]
if action_spec.discrete_size > 0:
self._env_actions[behavior_name].discrete[index] = action.discrete[0, :]
def get_steps(
self, behavior_name: BehaviorName
) -> Tuple[DecisionSteps, TerminalSteps]:
self._assert_behavior_exists(behavior_name)
return self._env_state[behavior_name]
def _poll_process(self) -> None:
"""
Check the status of the subprocess. If it has exited, raise a UnityEnvironmentException
:return: None
"""
if not self._process:
return
poll_res = self._process.poll()
if poll_res is not None:
exc_msg = self._returncode_to_env_message(self._process.returncode)
raise UnityEnvironmentException(exc_msg)
def close(self):
"""
Sends a shutdown signal to the unity environment, and closes the socket connection.
"""
if self._loaded:
self._close()
else:
raise UnityEnvironmentException("No Unity environment is loaded.")
def _close(self, timeout: Optional[int] = None) -> None:
"""
Close the communicator and environment subprocess (if necessary).
:int timeout: [Optional] Number of seconds to wait for the environment to shut down before
force-killing it. Defaults to `self.timeout_wait`.
"""
if timeout is None:
timeout = self._timeout_wait
self._loaded = False
self._communicator.close()
if self._process is not None:
# Wait a bit for the process to shutdown, but kill it if it takes too long
try:
self._process.wait(timeout=timeout)
logger.debug(self._returncode_to_env_message(self._process.returncode))
except subprocess.TimeoutExpired:
logger.warning("Environment timed out shutting down. Killing...")
self._process.kill()
# Set to None so we don't try to close multiple times.
self._process = None
@timed
def _generate_step_input(
self, vector_action: Dict[str, ActionTuple]
) -> UnityInputProto:
rl_in = UnityRLInputProto()
for b in vector_action:
n_agents = len(self._env_state[b][0])
if n_agents == 0:
continue
for i in range(n_agents):
action = AgentActionProto()
if vector_action[b].continuous is not None:
action.vector_actions_deprecated.extend(
vector_action[b].continuous[i]
)
action.continuous_actions.extend(vector_action[b].continuous[i])
if vector_action[b].discrete is not None:
action.vector_actions_deprecated.extend(
vector_action[b].discrete[i]
)
action.discrete_actions.extend(vector_action[b].discrete[i])
rl_in.agent_actions[b].value.extend([action])
rl_in.command = STEP
rl_in.side_channel = bytes(
self._side_channel_manager.generate_side_channel_messages()
)
return self._wrap_unity_input(rl_in)
def _generate_reset_input(self) -> UnityInputProto:
rl_in = UnityRLInputProto()
rl_in.command = RESET
rl_in.side_channel = bytes(
self._side_channel_manager.generate_side_channel_messages()
)
return self._wrap_unity_input(rl_in)
def _send_academy_parameters(
self, init_parameters: UnityRLInitializationInputProto
) -> UnityOutputProto:
inputs = UnityInputProto()
inputs.rl_initialization_input.CopyFrom(init_parameters)
return self._communicator.initialize(inputs, self._poll_process)
@staticmethod
def _wrap_unity_input(rl_input: UnityRLInputProto) -> UnityInputProto:
result = UnityInputProto()
result.rl_input.CopyFrom(rl_input)
return result
@staticmethod
def _returncode_to_signal_name(returncode: int) -> Optional[str]:
"""
Try to convert return codes into their corresponding signal name.
E.g. returncode_to_signal_name(-2) -> "SIGINT"
"""
try:
# A negative value -N indicates that the child was terminated by signal N (POSIX only).
s = signal.Signals(-returncode)
return s.name
except Exception:
# Should generally be a ValueError, but catch everything just in case.
return None
@staticmethod
def _returncode_to_env_message(returncode: int) -> str:
signal_name = UnityEnvironment._returncode_to_signal_name(returncode)
signal_name = f" ({signal_name})" if signal_name else ""
return f"Environment shut down with return code {returncode}{signal_name}."
|