GameServerO

Sleeping

+from typing import Dict, Any
+ML_AGENTS_STATS_WRITER = "mlagents.stats_writer"
+ML_AGENTS_TRAINER_TYPE = "mlagents.trainer_type"
+# TODO: the real type is Dict[str, HyperparamSettings]
+all_trainer_types: Dict[str, Any] = {}
+all_trainer_settings: Dict[str, Any] = {}

MLPY/Lib/site-packages/mlagents/plugins/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (427 Bytes). View file

MLPY/Lib/site-packages/mlagents/plugins/__pycache__/stats_writer.cpython-39.pyc ADDED Viewed

Binary file (2.2 kB). View file

MLPY/Lib/site-packages/mlagents/plugins/__pycache__/trainer_type.cpython-39.pyc ADDED Viewed

Binary file (2.42 kB). View file

MLPY/Lib/site-packages/mlagents/plugins/stats_writer.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import sys
+from typing import List
+# importlib.metadata is new in python3.8
+# We use the backport for older python versions.
+if sys.version_info < (3, 8):
+    import importlib_metadata
+else:
+    import importlib.metadata as importlib_metadata  # pylint: disable=E0611
+from mlagents.trainers.stats import StatsWriter
+from mlagents_envs import logging_util
+from mlagents.plugins import ML_AGENTS_STATS_WRITER
+from mlagents.trainers.settings import RunOptions
+from mlagents.trainers.stats import TensorboardWriter, GaugeWriter, ConsoleWriter
+logger = logging_util.get_logger(__name__)
+def get_default_stats_writers(run_options: RunOptions) -> List[StatsWriter]:
+    """
+    The StatsWriters that mlagents-learn always uses:
+    * A TensorboardWriter to write information to TensorBoard
+    * A GaugeWriter to record our internal stats
+    * A ConsoleWriter to output to stdout.
+    """
+    checkpoint_settings = run_options.checkpoint_settings
+    return [
+        TensorboardWriter(
+            checkpoint_settings.write_path,
+            clear_past_data=not checkpoint_settings.resume,
+            hidden_keys=["Is Training", "Step"],
+        ),
+        GaugeWriter(),
+        ConsoleWriter(),
+    ]
+def register_stats_writer_plugins(run_options: RunOptions) -> List[StatsWriter]:
+    """
+    Registers all StatsWriter plugins (including the default one),
+    and evaluates them, and returns the list of all the StatsWriter implementations.
+    """
+    all_stats_writers: List[StatsWriter] = []
+    if ML_AGENTS_STATS_WRITER not in importlib_metadata.entry_points():
+        logger.warning(
+            f"Unable to find any entry points for {ML_AGENTS_STATS_WRITER}, even the default ones. "
+            "Uninstalling and reinstalling ml-agents via pip should resolve. "
+            "Using default plugins for now."
+        )
+        return get_default_stats_writers(run_options)
+    entry_points = importlib_metadata.entry_points()[ML_AGENTS_STATS_WRITER]
+    for entry_point in entry_points:
+        try:
+            logger.debug(f"Initializing StatsWriter plugins: {entry_point.name}")
+            plugin_func = entry_point.load()
+            plugin_stats_writers = plugin_func(run_options)
+            logger.debug(
+                f"Found {len(plugin_stats_writers)} StatsWriters for plugin {entry_point.name}"
+            )
+            all_stats_writers += plugin_stats_writers
+        except BaseException:
+            # Catch all exceptions from setting up the plugin, so that bad user code doesn't break things.
+            logger.exception(
+                f"Error initializing StatsWriter plugins for {entry_point.name}. This plugin will not be used."
+            )
+    return all_stats_writers

MLPY/Lib/site-packages/mlagents/plugins/trainer_type.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import sys
+from typing import Dict, Tuple, Any
+# importlib.metadata is new in python3.8
+# We use the backport for older python versions.
+if sys.version_info < (3, 8):
+    import importlib_metadata
+else:
+    import importlib.metadata as importlib_metadata  # pylint: disable=E0611
+from mlagents_envs import logging_util
+from mlagents.plugins import ML_AGENTS_TRAINER_TYPE
+from mlagents.trainers.ppo.trainer import PPOTrainer
+from mlagents.trainers.sac.trainer import SACTrainer
+from mlagents.trainers.poca.trainer import POCATrainer
+from mlagents.trainers.ppo.optimizer_torch import PPOSettings
+from mlagents.trainers.sac.optimizer_torch import SACSettings
+from mlagents.trainers.poca.optimizer_torch import POCASettings
+from mlagents import plugins as mla_plugins
+logger = logging_util.get_logger(__name__)
+def get_default_trainer_types() -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    """
+    The Trainers that mlagents-learn always uses:
+    """
+    mla_plugins.all_trainer_types.update(
+        {
+            PPOTrainer.get_trainer_name(): PPOTrainer,
+            SACTrainer.get_trainer_name(): SACTrainer,
+            POCATrainer.get_trainer_name(): POCATrainer,
+        }
+    )
+    # global all_trainer_settings
+    mla_plugins.all_trainer_settings.update(
+        {
+            PPOTrainer.get_trainer_name(): PPOSettings,
+            SACTrainer.get_trainer_name(): SACSettings,
+            POCATrainer.get_trainer_name(): POCASettings,
+        }
+    )
+    return mla_plugins.all_trainer_types, mla_plugins.all_trainer_settings
+def register_trainer_plugins() -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    """
+    Registers all Trainer plugins (including the default one),
+    and evaluates them, and returns the list of all the Trainer implementations.
+    """
+    if ML_AGENTS_TRAINER_TYPE not in importlib_metadata.entry_points():
+        logger.warning(
+            f"Unable to find any entry points for {ML_AGENTS_TRAINER_TYPE}, even the default ones. "
+            "Uninstalling and reinstalling ml-agents via pip should resolve. "
+            "Using default plugins for now."
+        )
+        return get_default_trainer_types()
+    entry_points = importlib_metadata.entry_points()[ML_AGENTS_TRAINER_TYPE]
+    for entry_point in entry_points:
+        try:
+            logger.debug(f"Initializing Trainer plugins: {entry_point.name}")
+            plugin_func = entry_point.load()
+            plugin_trainer_types, plugin_trainer_settings = plugin_func()
+            logger.debug(
+                f"Found {len(plugin_trainer_types)} Trainers for plugin {entry_point.name}"
+            )
+            mla_plugins.all_trainer_types.update(plugin_trainer_types)
+            mla_plugins.all_trainer_settings.update(plugin_trainer_settings)
+        except BaseException:
+            # Catch all exceptions from setting up the plugin, so that bad user code doesn't break things.
+            logger.exception(
+                f"Error initializing Trainer plugins for {entry_point.name}. This plugin will not be used."
+            )
+    return mla_plugins.all_trainer_types, mla_plugins.all_trainer_settings

MLPY/Lib/site-packages/mlagents/torch_utils/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from mlagents.torch_utils.torch import torch as torch  # noqa
+from mlagents.torch_utils.torch import nn  # noqa
+from mlagents.torch_utils.torch import set_torch_config  # noqa
+from mlagents.torch_utils.torch import default_device  # noqa

MLPY/Lib/site-packages/mlagents/torch_utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (314 Bytes). View file

MLPY/Lib/site-packages/mlagents/torch_utils/__pycache__/cpu_utils.cpython-39.pyc ADDED Viewed

Binary file (1.51 kB). View file

MLPY/Lib/site-packages/mlagents/torch_utils/__pycache__/globals.cpython-39.pyc ADDED Viewed

Binary file (576 Bytes). View file

MLPY/Lib/site-packages/mlagents/torch_utils/__pycache__/torch.cpython-39.pyc ADDED Viewed

Binary file (1.64 kB). View file

MLPY/Lib/site-packages/mlagents/torch_utils/cpu_utils.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import Optional
+import os
+def get_num_threads_to_use() -> Optional[int]:
+    """
+    Gets the number of threads to use. For most problems, 4 is all you
+    need, but for smaller machines, we'd like to scale to less than that.
+    By default, PyTorch uses 1/2 of the available cores.
+    """
+    num_cpus = _get_num_available_cpus()
+    return max(min(num_cpus // 2, 4), 1) if num_cpus is not None else None
+def _get_num_available_cpus() -> Optional[int]:
+    """
+    Returns number of CPUs using cgroups if possible. This accounts
+    for Docker containers that are limited in cores.
+    """
+    period = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
+    quota = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
+    share = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.shares")
+    is_kubernetes = os.getenv("KUBERNETES_SERVICE_HOST") is not None
+    if period > 0 and quota > 0:
+        return int(quota // period)
+    elif period > 0 and share > 0 and is_kubernetes:
+        # In kubernetes, each requested CPU is 1024 CPU shares
+        # https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#how-pods-with-resource-limits-are-run
+        return int(share // 1024)
+    else:
+        return os.cpu_count()
+def _read_in_integer_file(filename: str) -> int:
+    try:
+        with open(filename) as f:
+            return int(f.read().rstrip())
+    except FileNotFoundError:
+        return -1

MLPY/Lib/site-packages/mlagents/torch_utils/globals.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from typing import Optional
+_rank: Optional[int] = None
+def get_rank() -> Optional[int]:
+    """
+    Returns the rank (in the MPI sense) of the current node.
+    For local training, this will always be None.
+    If this needs to be used, it should be done from outside ml-agents.
+    :return:
+    """
+    return _rank

MLPY/Lib/site-packages/mlagents/torch_utils/torch.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+from distutils.version import LooseVersion
+import pkg_resources
+from mlagents.torch_utils import cpu_utils
+from mlagents.trainers.settings import TorchSettings
+from mlagents_envs.logging_util import get_logger
+logger = get_logger(__name__)
+def assert_torch_installed():
+    # Check that torch version 1.6.0 or later has been installed. If not, refer
+    # user to the PyTorch webpage for install instructions.
+    torch_pkg = None
+    try:
+        torch_pkg = pkg_resources.get_distribution("torch")
+    except pkg_resources.DistributionNotFound:
+        pass
+    assert torch_pkg is not None and LooseVersion(torch_pkg.version) >= LooseVersion(
+        "1.6.0"
+    ), (
+        "A compatible version of PyTorch was not installed. Please visit the PyTorch homepage "
+        + "(https://pytorch.org/get-started/locally/) and follow the instructions to install. "
+        + "Version 1.6.0 and later are supported."
+    )
+assert_torch_installed()
+# This should be the only place that we import torch directly.
+# Everywhere else is caught by the banned-modules setting for flake8
+import torch  # noqa I201
+torch.set_num_threads(cpu_utils.get_num_threads_to_use())
+os.environ["KMP_BLOCKTIME"] = "0"
+_device = torch.device("cpu")
+def set_torch_config(torch_settings: TorchSettings) -> None:
+    global _device
+    if torch_settings.device is None:
+        device_str = "cuda" if torch.cuda.is_available() else "cpu"
+    else:
+        device_str = torch_settings.device
+    _device = torch.device(device_str)
+    if _device.type == "cuda":
+        torch.set_default_tensor_type(torch.cuda.FloatTensor)
+    else:
+        torch.set_default_tensor_type(torch.FloatTensor)
+    logger.debug(f"default Torch device: {_device}")
+# Initialize to default settings
+set_torch_config(TorchSettings(device=None))
+nn = torch.nn
+def default_device():
+    return _device

MLPY/Lib/site-packages/mlagents/trainers/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Version of the library that will be used to upload to pypi
+__version__ = "0.30.0"
+# Git tag that will be checked to determine whether to trigger upload to pypi
+__release_tag__ = "release_20"

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (211 Bytes). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/action_info.cpython-39.pyc ADDED Viewed

Binary file (1.27 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/agent_processor.cpython-39.pyc ADDED Viewed

Binary file (14.1 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/behavior_id_utils.cpython-39.pyc ADDED Viewed

Binary file (2.62 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/buffer.cpython-39.pyc ADDED Viewed

Binary file (18.3 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/cli_utils.cpython-39.pyc ADDED Viewed

Binary file (9.91 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/demo_loader.cpython-39.pyc ADDED Viewed

Binary file (6.44 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/directory_utils.cpython-39.pyc ADDED Viewed

Binary file (2.68 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/env_manager.cpython-39.pyc ADDED Viewed

Binary file (5.44 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/environment_parameter_manager.cpython-39.pyc ADDED Viewed

Binary file (6.54 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/exception.cpython-39.pyc ADDED Viewed

Binary file (2.16 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/learn.cpython-39.pyc ADDED Viewed

Binary file (8.42 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/run_experiment.cpython-39.pyc ADDED Viewed

Binary file (1.31 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/settings.cpython-39.pyc ADDED Viewed

Binary file (32.6 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/simple_env_manager.cpython-39.pyc ADDED Viewed

Binary file (3.54 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/stats.cpython-39.pyc ADDED Viewed

Binary file (14.9 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/subprocess_env_manager.cpython-39.pyc ADDED Viewed

Binary file (16.4 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/trainer_controller.cpython-39.pyc ADDED Viewed

Binary file (9.53 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/training_analytics_side_channel.cpython-39.pyc ADDED Viewed

Binary file (6.14 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/training_status.cpython-39.pyc ADDED Viewed

Binary file (5.09 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/trajectory.cpython-39.pyc ADDED Viewed

Binary file (8.41 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/__pycache__/upgrade_config.cpython-39.pyc ADDED Viewed

Binary file (5.8 kB). View file

MLPY/Lib/site-packages/mlagents/trainers/action_info.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from typing import NamedTuple, Any, Dict, List
+import numpy as np
+from mlagents_envs.base_env import AgentId
+ActionInfoOutputs = Dict[str, np.ndarray]
+class ActionInfo(NamedTuple):
+    """
+    A NamedTuple containing actions and related quantities to the policy forward
+    pass. Additionally contains the agent ids in the corresponding DecisionStep
+    :param action: The action output of the policy
+    :param env_action: The possibly clipped action to be executed in the environment
+    :param outputs: Dict of all quantities associated with the policy forward pass
+    :param agent_ids: List of int agent ids in DecisionStep
+    """
+    action: Any
+    env_action: Any
+    outputs: ActionInfoOutputs
+    agent_ids: List[AgentId]
+    @staticmethod
+    def empty() -> "ActionInfo":
+        return ActionInfo([], [], {}, [])

MLPY/Lib/site-packages/mlagents/trainers/agent_processor.py ADDED Viewed

	@@ -0,0 +1,469 @@

+import sys
+import numpy as np
+from typing import List, Dict, TypeVar, Generic, Tuple, Any, Union
+from collections import defaultdict, Counter
+import queue
+from mlagents.torch_utils import torch
+from mlagents_envs.base_env import (
+    ActionTuple,
+    DecisionSteps,
+    DecisionStep,
+    TerminalSteps,
+    TerminalStep,
+)
+from mlagents_envs.side_channel.stats_side_channel import (
+    StatsAggregationMethod,
+    EnvironmentStats,
+)
+from mlagents.trainers.exception import UnityTrainerException
+from mlagents.trainers.trajectory import AgentStatus, Trajectory, AgentExperience
+from mlagents.trainers.policy import Policy
+from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
+from mlagents.trainers.stats import StatsReporter
+from mlagents.trainers.behavior_id_utils import (
+    get_global_agent_id,
+    get_global_group_id,
+    GlobalAgentId,
+    GlobalGroupId,
+)
+from mlagents.trainers.torch_entities.action_log_probs import LogProbsTuple
+from mlagents.trainers.torch_entities.utils import ModelUtils
+T = TypeVar("T")
+class AgentProcessor:
+    """
+    AgentProcessor contains a dictionary per-agent trajectory buffers. The buffers are indexed by agent_id.
+    Buffer also contains an update_buffer that corresponds to the buffer used when updating the model.
+    One AgentProcessor should be created per agent group.
+    """
+    def __init__(
+        self,
+        policy: Policy,
+        behavior_id: str,
+        stats_reporter: StatsReporter,
+        max_trajectory_length: int = sys.maxsize,
+    ):
+        """
+        Create an AgentProcessor.
+        :param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory
+        when it is finished.
+        :param policy: Policy instance associated with this AgentProcessor.
+        :param max_trajectory_length: Maximum length of a trajectory before it is added to the trainer.
+        :param stats_category: The category under which to write the stats. Usually, this comes from the Trainer.
+        """
+        self._experience_buffers: Dict[
+            GlobalAgentId, List[AgentExperience]
+        ] = defaultdict(list)
+        self._last_step_result: Dict[GlobalAgentId, Tuple[DecisionStep, int]] = {}
+        # current_group_obs is used to collect the current (i.e. the most recently seen)
+        # obs of all the agents in the same group, and assemble the group obs.
+        # It is a dictionary of GlobalGroupId to dictionaries of GlobalAgentId to observation.
+        self._current_group_obs: Dict[
+            GlobalGroupId, Dict[GlobalAgentId, List[np.ndarray]]
+        ] = defaultdict(lambda: defaultdict(list))
+        # group_status is used to collect the current, most recently seen
+        # group status of all the agents in the same group, and assemble the group's status.
+        # It is a dictionary of GlobalGroupId to dictionaries of GlobalAgentId to AgentStatus.
+        self._group_status: Dict[
+            GlobalGroupId, Dict[GlobalAgentId, AgentStatus]
+        ] = defaultdict(lambda: defaultdict(None))
+        # last_take_action_outputs stores the action a_t taken before the current observation s_(t+1), while
+        # grabbing previous_action from the policy grabs the action PRIOR to that, a_(t-1).
+        self._last_take_action_outputs: Dict[GlobalAgentId, ActionInfoOutputs] = {}
+        self._episode_steps: Counter = Counter()
+        self._episode_rewards: Dict[GlobalAgentId, float] = defaultdict(float)
+        self._stats_reporter = stats_reporter
+        self._max_trajectory_length = max_trajectory_length
+        self._trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
+        self._behavior_id = behavior_id
+        # Note: In the future this policy reference will be the policy of the env_manager and not the trainer.
+        # We can in that case just grab the action from the policy rather than having it passed in.
+        self.policy = policy
+    def add_experiences(
+        self,
+        decision_steps: DecisionSteps,
+        terminal_steps: TerminalSteps,
+        worker_id: int,
+        previous_action: ActionInfo,
+    ) -> None:
+        """
+        Adds experiences to each agent's experience history.
+        :param decision_steps: current DecisionSteps.
+        :param terminal_steps: current TerminalSteps.
+        :param previous_action: The outputs of the Policy's get_action method.
+        """
+        take_action_outputs = previous_action.outputs
+        if take_action_outputs:
+            try:
+                for _entropy in take_action_outputs["entropy"]:
+                    if isinstance(_entropy, torch.Tensor):
+                        _entropy = ModelUtils.to_numpy(_entropy)
+                    self._stats_reporter.add_stat("Policy/Entropy", _entropy)
+            except KeyError:
+                pass
+        # Make unique agent_ids that are global across workers
+        action_global_agent_ids = [
+            get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids
+        ]
+        for global_id in action_global_agent_ids:
+            if global_id in self._last_step_result:  # Don't store if agent just reset
+                self._last_take_action_outputs[global_id] = take_action_outputs
+        # Iterate over all the terminal steps, first gather all the group obs
+        # and then create the AgentExperiences/Trajectories. _add_to_group_status
+        # stores Group statuses in a common data structure self.group_status
+        for terminal_step in terminal_steps.values():
+            self._add_group_status_and_obs(terminal_step, worker_id)
+        for terminal_step in terminal_steps.values():
+            local_id = terminal_step.agent_id
+            global_id = get_global_agent_id(worker_id, local_id)
+            self._process_step(
+                terminal_step, worker_id, terminal_steps.agent_id_to_index[local_id]
+            )
+        # Iterate over all the decision steps, first gather all the group obs
+        # and then create the trajectories. _add_to_group_status
+        # stores Group statuses in a common data structure self.group_status
+        for ongoing_step in decision_steps.values():
+            self._add_group_status_and_obs(ongoing_step, worker_id)
+        for ongoing_step in decision_steps.values():
+            local_id = ongoing_step.agent_id
+            self._process_step(
+                ongoing_step, worker_id, decision_steps.agent_id_to_index[local_id]
+            )
+        # Clear the last seen group obs when agents die, but only after all of the group
+        # statuses were added to the trajectory.
+        for terminal_step in terminal_steps.values():
+            local_id = terminal_step.agent_id
+            global_id = get_global_agent_id(worker_id, local_id)
+            self._clear_group_status_and_obs(global_id)
+        for _gid in action_global_agent_ids:
+            # If the ID doesn't have a last step result, the agent just reset,
+            # don't store the action.
+            if _gid in self._last_step_result:
+                if "action" in take_action_outputs:
+                    self.policy.save_previous_action(
+                        [_gid], take_action_outputs["action"]
+                    )
+    def _add_group_status_and_obs(
+        self, step: Union[TerminalStep, DecisionStep], worker_id: int
+    ) -> None:
+        """
+        Takes a TerminalStep or DecisionStep and adds the information in it
+        to self.group_status. This information can then be retrieved
+        when constructing trajectories to get the status of group mates. Also stores the current
+        observation into current_group_obs, to be used to get the next group observations
+        for bootstrapping.
+        :param step: TerminalStep or DecisionStep
+        :param worker_id: Worker ID of this particular environment. Used to generate a
+            global group id.
+        """
+        global_agent_id = get_global_agent_id(worker_id, step.agent_id)
+        stored_decision_step, idx = self._last_step_result.get(
+            global_agent_id, (None, None)
+        )
+        stored_take_action_outputs = self._last_take_action_outputs.get(
+            global_agent_id, None
+        )
+        if stored_decision_step is not None and stored_take_action_outputs is not None:
+            # 0, the default group_id, means that the agent doesn't belong to an agent group.
+            # If 0, don't add any groupmate information.
+            if step.group_id > 0:
+                global_group_id = get_global_group_id(worker_id, step.group_id)
+                stored_actions = stored_take_action_outputs["action"]
+                action_tuple = ActionTuple(
+                    continuous=stored_actions.continuous[idx],
+                    discrete=stored_actions.discrete[idx],
+                )
+                group_status = AgentStatus(
+                    obs=stored_decision_step.obs,
+                    reward=step.reward,
+                    action=action_tuple,
+                    done=isinstance(step, TerminalStep),
+                )
+                self._group_status[global_group_id][global_agent_id] = group_status
+                self._current_group_obs[global_group_id][global_agent_id] = step.obs
+    def _clear_group_status_and_obs(self, global_id: GlobalAgentId) -> None:
+        """
+        Clears an agent from self._group_status and self._current_group_obs.
+        """
+        self._delete_in_nested_dict(self._current_group_obs, global_id)
+        self._delete_in_nested_dict(self._group_status, global_id)
+    def _delete_in_nested_dict(self, nested_dict: Dict[str, Any], key: str) -> None:
+        for _manager_id in list(nested_dict.keys()):
+            _team_group = nested_dict[_manager_id]
+            self._safe_delete(_team_group, key)
+            if not _team_group:  # if dict is empty
+                self._safe_delete(nested_dict, _manager_id)
+    def _process_step(
+        self, step: Union[TerminalStep, DecisionStep], worker_id: int, index: int
+    ) -> None:
+        terminated = isinstance(step, TerminalStep)
+        global_agent_id = get_global_agent_id(worker_id, step.agent_id)
+        global_group_id = get_global_group_id(worker_id, step.group_id)
+        stored_decision_step, idx = self._last_step_result.get(
+            global_agent_id, (None, None)
+        )
+        stored_take_action_outputs = self._last_take_action_outputs.get(
+            global_agent_id, None
+        )
+        if not terminated:
+            # Index is needed to grab from last_take_action_outputs
+            self._last_step_result[global_agent_id] = (step, index)
+        # This state is the consequence of a past action
+        if stored_decision_step is not None and stored_take_action_outputs is not None:
+            obs = stored_decision_step.obs
+            if self.policy.use_recurrent:
+                memory = self.policy.retrieve_previous_memories([global_agent_id])[0, :]
+            else:
+                memory = None
+            done = terminated  # Since this is an ongoing step
+            interrupted = step.interrupted if terminated else False
+            # Add the outputs of the last eval
+            stored_actions = stored_take_action_outputs["action"]
+            action_tuple = ActionTuple(
+                continuous=stored_actions.continuous[idx],
+                discrete=stored_actions.discrete[idx],
+            )
+            try:
+                stored_action_probs = stored_take_action_outputs["log_probs"]
+                if not isinstance(stored_action_probs, LogProbsTuple):
+                    stored_action_probs = stored_action_probs.to_log_probs_tuple()
+                log_probs_tuple = LogProbsTuple(
+                    continuous=stored_action_probs.continuous[idx],
+                    discrete=stored_action_probs.discrete[idx],
+                )
+            except KeyError:
+                log_probs_tuple = LogProbsTuple.empty_log_probs()
+            action_mask = stored_decision_step.action_mask
+            prev_action = self.policy.retrieve_previous_action([global_agent_id])[0, :]
+            # Assemble teammate_obs. If none saved, then it will be an empty list.
+            group_statuses = []
+            for _id, _mate_status in self._group_status[global_group_id].items():
+                if _id != global_agent_id:
+                    group_statuses.append(_mate_status)
+            experience = AgentExperience(
+                obs=obs,
+                reward=step.reward,
+                done=done,
+                action=action_tuple,
+                action_probs=log_probs_tuple,
+                action_mask=action_mask,
+                prev_action=prev_action,
+                interrupted=interrupted,
+                memory=memory,
+                group_status=group_statuses,
+                group_reward=step.group_reward,
+            )
+            # Add the value outputs if needed
+            self._experience_buffers[global_agent_id].append(experience)
+            self._episode_rewards[global_agent_id] += step.reward
+            if not terminated:
+                self._episode_steps[global_agent_id] += 1
+            # Add a trajectory segment to the buffer if terminal or the length has reached the time horizon
+            if (
+                len(self._experience_buffers[global_agent_id])
+                >= self._max_trajectory_length
+                or terminated
+            ):
+                next_obs = step.obs
+                next_group_obs = []
+                for _id, _obs in self._current_group_obs[global_group_id].items():
+                    if _id != global_agent_id:
+                        next_group_obs.append(_obs)
+                trajectory = Trajectory(
+                    steps=self._experience_buffers[global_agent_id],
+                    agent_id=global_agent_id,
+                    next_obs=next_obs,
+                    next_group_obs=next_group_obs,
+                    behavior_id=self._behavior_id,
+                )
+                for traj_queue in self._trajectory_queues:
+                    traj_queue.put(trajectory)
+                self._experience_buffers[global_agent_id] = []
+            if terminated:
+                # Record episode length.
+                self._stats_reporter.add_stat(
+                    "Environment/Episode Length",
+                    self._episode_steps.get(global_agent_id, 0),
+                )
+                self._clean_agent_data(global_agent_id)
+    def _clean_agent_data(self, global_id: GlobalAgentId) -> None:
+        """
+        Removes the data for an Agent.
+        """
+        self._safe_delete(self._experience_buffers, global_id)
+        self._safe_delete(self._last_take_action_outputs, global_id)
+        self._safe_delete(self._last_step_result, global_id)
+        self._safe_delete(self._episode_steps, global_id)
+        self._safe_delete(self._episode_rewards, global_id)
+        self.policy.remove_previous_action([global_id])
+        self.policy.remove_memories([global_id])
+    def _safe_delete(self, my_dictionary: Dict[Any, Any], key: Any) -> None:
+        """
+        Safe removes data from a dictionary. If not found,
+        don't delete.
+        """
+        if key in my_dictionary:
+            del my_dictionary[key]
+    def publish_trajectory_queue(
+        self, trajectory_queue: "AgentManagerQueue[Trajectory]"
+    ) -> None:
+        """
+        Adds a trajectory queue to the list of queues to publish to when this AgentProcessor
+        assembles a Trajectory
+        :param trajectory_queue: Trajectory queue to publish to.
+        """
+        self._trajectory_queues.append(trajectory_queue)
+    def end_episode(self) -> None:
+        """
+        Ends the episode, terminating the current trajectory and stopping stats collection for that
+        episode. Used for forceful reset (e.g. in curriculum or generalization training.)
+        """
+        all_gids = list(self._experience_buffers.keys())  # Need to make copy
+        for _gid in all_gids:
+            self._clean_agent_data(_gid)
+class AgentManagerQueue(Generic[T]):
+    """
+    Queue used by the AgentManager. Note that we make our own class here because in most implementations
+    deque is sufficient and faster. However, if we want to switch to multiprocessing, we'll need to change
+    out this implementation.
+    """
+    class Empty(Exception):
+        """
+        Exception for when the queue is empty.
+        """
+        pass
+    def __init__(self, behavior_id: str, maxlen: int = 0):
+        """
+        Initializes an AgentManagerQueue. Note that we can give it a behavior_id so that it can be identified
+        separately from an AgentManager.
+        """
+        self._maxlen: int = maxlen
+        self._queue: queue.Queue = queue.Queue(maxsize=maxlen)
+        self._behavior_id = behavior_id
+    @property
+    def maxlen(self):
+        """
+        The maximum length of the queue.
+        :return: Maximum length of the queue.
+        """
+        return self._maxlen
+    @property
+    def behavior_id(self):
+        """
+        The Behavior ID of this queue.
+        :return: Behavior ID associated with the queue.
+        """
+        return self._behavior_id
+    def qsize(self) -> int:
+        """
+        Returns the approximate size of the queue. Note that values may differ
+        depending on the underlying queue implementation.
+        """
+        return self._queue.qsize()
+    def empty(self) -> bool:
+        return self._queue.empty()
+    def get_nowait(self) -> T:
+        """
+        Gets the next item from the queue, throwing an AgentManagerQueue.Empty exception
+        if the queue is empty.
+        """
+        try:
+            return self._queue.get_nowait()
+        except queue.Empty:
+            raise self.Empty("The AgentManagerQueue is empty.")
+    def put(self, item: T) -> None:
+        self._queue.put(item)
+class AgentManager(AgentProcessor):
+    """
+    An AgentManager is an AgentProcessor that also holds a single trajectory and policy queue.
+    Note: this leaves room for adding AgentProcessors that publish multiple trajectory queues.
+    """
+    def __init__(
+        self,
+        policy: Policy,
+        behavior_id: str,
+        stats_reporter: StatsReporter,
+        max_trajectory_length: int = sys.maxsize,
+        threaded: bool = True,
+    ):
+        super().__init__(policy, behavior_id, stats_reporter, max_trajectory_length)
+        trajectory_queue_len = 20 if threaded else 0
+        self.trajectory_queue: AgentManagerQueue[Trajectory] = AgentManagerQueue(
+            self._behavior_id, maxlen=trajectory_queue_len
+        )
+        # NOTE: we make policy queues of infinite length to avoid lockups of the trainers.
+        # In the environment manager, we make sure to empty the policy queue before continuing to produce steps.
+        self.policy_queue: AgentManagerQueue[Policy] = AgentManagerQueue(
+            self._behavior_id, maxlen=0
+        )
+        self.publish_trajectory_queue(self.trajectory_queue)
+    def record_environment_stats(
+        self, env_stats: EnvironmentStats, worker_id: int
+    ) -> None:
+        """
+        Pass stats from the environment to the StatsReporter.
+        Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used.
+        The worker_id is used to determine whether StatsReporter.set_stat should be used.
+        :param env_stats:
+        :param worker_id:
+        :return:
+        """
+        for stat_name, value_list in env_stats.items():
+            for val, agg_type in value_list:
+                if agg_type == StatsAggregationMethod.AVERAGE:
+                    self._stats_reporter.add_stat(stat_name, val, agg_type)
+                elif agg_type == StatsAggregationMethod.SUM:
+                    self._stats_reporter.add_stat(stat_name, val, agg_type)
+                elif agg_type == StatsAggregationMethod.HISTOGRAM:
+                    self._stats_reporter.add_stat(stat_name, val, agg_type)
+                elif agg_type == StatsAggregationMethod.MOST_RECENT:
+                    # In order to prevent conflicts between multiple environments,
+                    # only stats from the first environment are recorded.
+                    if worker_id == 0:
+                        self._stats_reporter.set_stat(stat_name, val)
+                else:
+                    raise UnityTrainerException(
+                        f"Unknown StatsAggregationMethod encountered. {agg_type}"
+                    )

MLPY/Lib/site-packages/mlagents/trainers/behavior_id_utils.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from typing import NamedTuple
+from urllib.parse import urlparse, parse_qs
+from mlagents_envs.base_env import AgentId, GroupId
+GlobalGroupId = str
+GlobalAgentId = str
+class BehaviorIdentifiers(NamedTuple):
+    """
+    BehaviorIdentifiers is a named tuple of the identifiers that uniquely distinguish
+    an agent encountered in the trainer_controller. The named tuple consists of the
+    fully qualified behavior name, the name of the brain name (corresponds to trainer
+    in the trainer controller) and the team id.  In the future, this can be extended
+    to support further identifiers.
+    """
+    behavior_id: str
+    brain_name: str
+    team_id: int
+    @staticmethod
+    def from_name_behavior_id(name_behavior_id: str) -> "BehaviorIdentifiers":
+        """
+        Parses a name_behavior_id of the form name?team=0
+        into a BehaviorIdentifiers NamedTuple.
+        This allows you to access the brain name and team id of an agent
+        :param name_behavior_id: String of behavior params in HTTP format.
+        :returns: A BehaviorIdentifiers object.
+        """
+        parsed = urlparse(name_behavior_id)
+        name = parsed.path
+        ids = parse_qs(parsed.query)
+        team_id: int = 0
+        if "team" in ids:
+            team_id = int(ids["team"][0])
+        return BehaviorIdentifiers(
+            behavior_id=name_behavior_id, brain_name=name, team_id=team_id
+        )
+def create_name_behavior_id(name: str, team_id: int) -> str:
+    """
+    Reconstructs fully qualified behavior name from name and team_id
+    :param name: brain name
+    :param team_id: team ID
+    :return: name_behavior_id
+    """
+    return name + "?team=" + str(team_id)
+def get_global_agent_id(worker_id: int, agent_id: AgentId) -> GlobalAgentId:
+    """
+    Create an agent id that is unique across environment workers using the worker_id.
+    """
+    return f"agent_{worker_id}-{agent_id}"
+def get_global_group_id(worker_id: int, group_id: GroupId) -> GlobalGroupId:
+    """
+    Create a group id that is unique across environment workers when using the worker_id.
+    """
+    return f"group_{worker_id}-{group_id}"

MLPY/Lib/site-packages/mlagents/trainers/buffer.py ADDED Viewed

	@@ -0,0 +1,521 @@

+from collections import defaultdict
+from collections.abc import MutableMapping
+import enum
+import itertools
+from typing import BinaryIO, DefaultDict, List, Tuple, Union, Optional
+import numpy as np
+import h5py
+from mlagents_envs.exception import UnityException
+# Elements in the buffer can be np.ndarray, or in the case of teammate obs, actions, rewards,
+# a List of np.ndarray. This is done so that we don't have duplicated np.ndarrays, only references.
+BufferEntry = Union[np.ndarray, List[np.ndarray]]
+class BufferException(UnityException):
+    """
+    Related to errors with the Buffer.
+    """
+    pass
+class BufferKey(enum.Enum):
+    ACTION_MASK = "action_mask"
+    CONTINUOUS_ACTION = "continuous_action"
+    NEXT_CONT_ACTION = "next_continuous_action"
+    CONTINUOUS_LOG_PROBS = "continuous_log_probs"
+    DISCRETE_ACTION = "discrete_action"
+    NEXT_DISC_ACTION = "next_discrete_action"
+    DISCRETE_LOG_PROBS = "discrete_log_probs"
+    DONE = "done"
+    ENVIRONMENT_REWARDS = "environment_rewards"
+    MASKS = "masks"
+    MEMORY = "memory"
+    CRITIC_MEMORY = "critic_memory"
+    BASELINE_MEMORY = "poca_baseline_memory"
+    PREV_ACTION = "prev_action"
+    ADVANTAGES = "advantages"
+    DISCOUNTED_RETURNS = "discounted_returns"
+    GROUP_DONES = "group_dones"
+    GROUPMATE_REWARDS = "groupmate_reward"
+    GROUP_REWARD = "group_reward"
+    GROUP_CONTINUOUS_ACTION = "group_continuous_action"
+    GROUP_DISCRETE_ACTION = "group_discrete_aaction"
+    GROUP_NEXT_CONT_ACTION = "group_next_cont_action"
+    GROUP_NEXT_DISC_ACTION = "group_next_disc_action"
+class ObservationKeyPrefix(enum.Enum):
+    OBSERVATION = "obs"
+    NEXT_OBSERVATION = "next_obs"
+    GROUP_OBSERVATION = "group_obs"
+    NEXT_GROUP_OBSERVATION = "next_group_obs"
+class RewardSignalKeyPrefix(enum.Enum):
+    # Reward signals
+    REWARDS = "rewards"
+    VALUE_ESTIMATES = "value_estimates"
+    RETURNS = "returns"
+    ADVANTAGE = "advantage"
+    BASELINES = "baselines"
+AgentBufferKey = Union[
+    BufferKey, Tuple[ObservationKeyPrefix, int], Tuple[RewardSignalKeyPrefix, str]
+]
+class RewardSignalUtil:
+    @staticmethod
+    def rewards_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.REWARDS, name
+    @staticmethod
+    def value_estimates_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.RETURNS, name
+    @staticmethod
+    def returns_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.RETURNS, name
+    @staticmethod
+    def advantage_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.ADVANTAGE, name
+    @staticmethod
+    def baseline_estimates_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.BASELINES, name
+class AgentBufferField(list):
+    """
+    AgentBufferField is a list of numpy arrays, or List[np.ndarray] for group entries.
+    When an agent collects a field, you can add it to its AgentBufferField with the append method.
+    """
+    def __init__(self, *args, **kwargs):
+        self.padding_value = 0
+        super().__init__(*args, **kwargs)
+    def __str__(self) -> str:
+        return f"AgentBufferField: {super().__str__()}"
+    def __getitem__(self, index):
+        return_data = super().__getitem__(index)
+        if isinstance(return_data, list):
+            return AgentBufferField(return_data)
+        else:
+            return return_data
+    @property
+    def contains_lists(self) -> bool:
+        """
+        Checks whether this AgentBufferField contains List[np.ndarray].
+        """
+        return len(self) > 0 and isinstance(self[0], list)
+    def append(self, element: BufferEntry, padding_value: float = 0.0) -> None:
+        """
+        Adds an element to this list. Also lets you change the padding
+        type, so that it can be set on append (e.g. action_masks should
+        be padded with 1.)
+        :param element: The element to append to the list.
+        :param padding_value: The value used to pad when get_batch is called.
+        """
+        super().append(element)
+        self.padding_value = padding_value
+    def set(self, data: List[BufferEntry]) -> None:
+        """
+        Sets the list of BufferEntry to the input data
+        :param data: The BufferEntry list to be set.
+        """
+        self[:] = data
+    def get_batch(
+        self,
+        batch_size: int = None,
+        training_length: Optional[int] = 1,
+        sequential: bool = True,
+    ) -> List[BufferEntry]:
+        """
+        Retrieve the last batch_size elements of length training_length
+        from the list of np.array
+        :param batch_size: The number of elements to retrieve. If None:
+        All elements will be retrieved.
+        :param training_length: The length of the sequence to be retrieved. If
+        None: only takes one element.
+        :param sequential: If true and training_length is not None: the elements
+        will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
+        sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
+        [[a,b],[b,c],[c,d],[d,e]]
+        """
+        if training_length is None:
+            training_length = 1
+        if sequential:
+            # The sequences will not have overlapping elements (this involves padding)
+            leftover = len(self) % training_length
+            # leftover is the number of elements in the first sequence (this sequence might need 0 padding)
+            if batch_size is None:
+                # retrieve the maximum number of elements
+                batch_size = len(self) // training_length + 1 * (leftover != 0)
+            # The maximum number of sequences taken from a list of length len(self) without overlapping
+            # with padding is equal to batch_size
+            if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
+                raise BufferException(
+                    "The batch size and training length requested for get_batch where"
+                    " too large given the current number of data points."
+                )
+            if batch_size * training_length > len(self):
+                if self.contains_lists:
+                    padding = []
+                else:
+                    # We want to duplicate the last value in the array, multiplied by the padding_value.
+                    padding = np.array(self[-1], dtype=np.float32) * self.padding_value
+                return self[:] + [padding] * (training_length - leftover)
+            else:
+                return self[len(self) - batch_size * training_length :]
+        else:
+            # The sequences will have overlapping elements
+            if batch_size is None:
+                # retrieve the maximum number of elements
+                batch_size = len(self) - training_length + 1
+            # The number of sequences of length training_length taken from a list of len(self) elements
+            # with overlapping is equal to batch_size
+            if (len(self) - training_length + 1) < batch_size:
+                raise BufferException(
+                    "The batch size and training length requested for get_batch where"
+                    " too large given the current number of data points."
+                )
+            tmp_list: List[np.ndarray] = []
+            for end in range(len(self) - batch_size + 1, len(self) + 1):
+                tmp_list += self[end - training_length : end]
+            return tmp_list
+    def reset_field(self) -> None:
+        """
+        Resets the AgentBufferField
+        """
+        self[:] = []
+    def padded_to_batch(
+        self, pad_value: np.float = 0, dtype: np.dtype = np.float32
+    ) -> Union[np.ndarray, List[np.ndarray]]:
+        """
+        Converts this AgentBufferField (which is a List[BufferEntry]) into a numpy array
+        with first dimension equal to the length of this AgentBufferField. If this AgentBufferField
+        contains a List[List[BufferEntry]] (i.e., in the case of group observations), return a List
+        containing numpy arrays or tensors, of length equal to the maximum length of an entry. Missing
+        For entries with less than that length, the array will be padded with pad_value.
+        :param pad_value: Value to pad List AgentBufferFields, when there are less than the maximum
+            number of agents present.
+        :param dtype: Dtype of output numpy array.
+        :return: Numpy array or List of numpy arrays representing this AgentBufferField, where the first
+            dimension is equal to the length of the AgentBufferField.
+        """
+        if len(self) > 0 and not isinstance(self[0], list):
+            return np.asanyarray(self, dtype=dtype)
+        shape = None
+        for _entry in self:
+            # _entry could be an empty list if there are no group agents in this
+            # step. Find the first non-empty list and use that shape.
+            if _entry:
+                shape = _entry[0].shape
+                break
+        # If there were no groupmate agents in the entire batch, return an empty List.
+        if shape is None:
+            return []
+        # Convert to numpy array while padding with 0's
+        new_list = list(
+            map(
+                lambda x: np.asanyarray(x, dtype=dtype),
+                itertools.zip_longest(*self, fillvalue=np.full(shape, pad_value)),
+            )
+        )
+        return new_list
+    def to_ndarray(self):
+        """
+        Returns the AgentBufferField which is a list of numpy ndarrays (or List[np.ndarray]) as an ndarray.
+        """
+        return np.array(self)
+class AgentBuffer(MutableMapping):
+    """
+    AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
+    The keys correspond to the name of the field. Example: state, action
+    """
+    # Whether or not to validate the types of keys at runtime
+    # This should be off for training, but enabled for testing
+    CHECK_KEY_TYPES_AT_RUNTIME = False
+    def __init__(self):
+        self.last_brain_info = None
+        self.last_take_action_outputs = None
+        self._fields: DefaultDict[AgentBufferKey, AgentBufferField] = defaultdict(
+            AgentBufferField
+        )
+    def __str__(self):
+        return ", ".join([f"'{k}' : {str(self[k])}" for k in self._fields.keys()])
+    def reset_agent(self) -> None:
+        """
+        Resets the AgentBuffer
+        """
+        for f in self._fields.values():
+            f.reset_field()
+        self.last_brain_info = None
+        self.last_take_action_outputs = None
+    @staticmethod
+    def _check_key(key):
+        if isinstance(key, BufferKey):
+            return
+        if isinstance(key, tuple):
+            key0, key1 = key
+            if isinstance(key0, ObservationKeyPrefix):
+                if isinstance(key1, int):
+                    return
+                raise KeyError(f"{key} has type ({type(key0)}, {type(key1)})")
+            if isinstance(key0, RewardSignalKeyPrefix):
+                if isinstance(key1, str):
+                    return
+                raise KeyError(f"{key} has type ({type(key0)}, {type(key1)})")
+        raise KeyError(f"{key} is a {type(key)}")
+    @staticmethod
+    def _encode_key(key: AgentBufferKey) -> str:
+        """
+        Convert the key to a string representation so that it can be used for serialization.
+        """
+        if isinstance(key, BufferKey):
+            return key.value
+        prefix, suffix = key
+        return f"{prefix.value}:{suffix}"
+    @staticmethod
+    def _decode_key(encoded_key: str) -> AgentBufferKey:
+        """
+        Convert the string representation back to a key after serialization.
+        """
+        # Simple case: convert the string directly to a BufferKey
+        try:
+            return BufferKey(encoded_key)
+        except ValueError:
+            pass
+        # Not a simple key, so split into two parts
+        prefix_str, _, suffix_str = encoded_key.partition(":")
+        # See if it's an ObservationKeyPrefix first
+        try:
+            return ObservationKeyPrefix(prefix_str), int(suffix_str)
+        except ValueError:
+            pass
+        # If not, it had better be a RewardSignalKeyPrefix
+        try:
+            return RewardSignalKeyPrefix(prefix_str), suffix_str
+        except ValueError:
+            raise ValueError(f"Unable to convert {encoded_key} to an AgentBufferKey")
+    def __getitem__(self, key: AgentBufferKey) -> AgentBufferField:
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            self._check_key(key)
+        return self._fields[key]
+    def __setitem__(self, key: AgentBufferKey, value: AgentBufferField) -> None:
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            self._check_key(key)
+        self._fields[key] = value
+    def __delitem__(self, key: AgentBufferKey) -> None:
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            self._check_key(key)
+        self._fields.__delitem__(key)
+    def __iter__(self):
+        return self._fields.__iter__()
+    def __len__(self) -> int:
+        return self._fields.__len__()
+    def __contains__(self, key):
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            self._check_key(key)
+        return self._fields.__contains__(key)
+    def check_length(self, key_list: List[AgentBufferKey]) -> bool:
+        """
+        Some methods will require that some fields have the same length.
+        check_length will return true if the fields in key_list
+        have the same length.
+        :param key_list: The fields which length will be compared
+        """
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            for k in key_list:
+                self._check_key(k)
+        if len(key_list) < 2:
+            return True
+        length = None
+        for key in key_list:
+            if key not in self._fields:
+                return False
+            if (length is not None) and (length != len(self[key])):
+                return False
+            length = len(self[key])
+        return True
+    def shuffle(
+        self, sequence_length: int, key_list: List[AgentBufferKey] = None
+    ) -> None:
+        """
+        Shuffles the fields in key_list in a consistent way: The reordering will
+        be the same across fields.
+        :param key_list: The fields that must be shuffled.
+        """
+        if key_list is None:
+            key_list = list(self._fields.keys())
+        if not self.check_length(key_list):
+            raise BufferException(
+                "Unable to shuffle if the fields are not of same length"
+            )
+        s = np.arange(len(self[key_list[0]]) // sequence_length)
+        np.random.shuffle(s)
+        for key in key_list:
+            buffer_field = self[key]
+            tmp: List[np.ndarray] = []
+            for i in s:
+                tmp += buffer_field[i * sequence_length : (i + 1) * sequence_length]
+            buffer_field.set(tmp)
+    def make_mini_batch(self, start: int, end: int) -> "AgentBuffer":
+        """
+        Creates a mini-batch from buffer.
+        :param start: Starting index of buffer.
+        :param end: Ending index of buffer.
+        :return: Dict of mini batch.
+        """
+        mini_batch = AgentBuffer()
+        for key, field in self._fields.items():
+            # slicing AgentBufferField returns a List[Any}
+            mini_batch[key] = field[start:end]  # type: ignore
+        return mini_batch
+    def sample_mini_batch(
+        self, batch_size: int, sequence_length: int = 1
+    ) -> "AgentBuffer":
+        """
+        Creates a mini-batch from a random start and end.
+        :param batch_size: number of elements to withdraw.
+        :param sequence_length: Length of sequences to sample.
+            Number of sequences to sample will be batch_size/sequence_length.
+        """
+        num_seq_to_sample = batch_size // sequence_length
+        mini_batch = AgentBuffer()
+        buff_len = self.num_experiences
+        num_sequences_in_buffer = buff_len // sequence_length
+        start_idxes = (
+            np.random.randint(num_sequences_in_buffer, size=num_seq_to_sample)
+            * sequence_length
+        )  # Sample random sequence starts
+        for key in self:
+            buffer_field = self[key]
+            mb_list = (buffer_field[i : i + sequence_length] for i in start_idxes)
+            # See comparison of ways to make a list from a list of lists here:
+            # https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists
+            mini_batch[key].set(list(itertools.chain.from_iterable(mb_list)))
+        return mini_batch
+    def save_to_file(self, file_object: BinaryIO) -> None:
+        """
+        Saves the AgentBuffer to a file-like object.
+        """
+        with h5py.File(file_object, "w") as write_file:
+            for key, data in self.items():
+                write_file.create_dataset(
+                    self._encode_key(key), data=data, dtype="f", compression="gzip"
+                )
+    def load_from_file(self, file_object: BinaryIO) -> None:
+        """
+        Loads the AgentBuffer from a file-like object.
+        """
+        with h5py.File(file_object, "r") as read_file:
+            for key in list(read_file.keys()):
+                decoded_key = self._decode_key(key)
+                self[decoded_key] = AgentBufferField()
+                # extend() will convert the numpy array's first dimension into list
+                self[decoded_key].extend(read_file[key][()])
+    def truncate(self, max_length: int, sequence_length: int = 1) -> None:
+        """
+        Truncates the buffer to a certain length.
+        This can be slow for large buffers. We compensate by cutting further than we need to, so that
+        we're not truncating at each update. Note that we must truncate an integer number of sequence_lengths
+        param: max_length: The length at which to truncate the buffer.
+        """
+        current_length = self.num_experiences
+        # make max_length an integer number of sequence_lengths
+        max_length -= max_length % sequence_length
+        if current_length > max_length:
+            for _key in self.keys():
+                self[_key][:] = self[_key][current_length - max_length :]
+    def resequence_and_append(
+        self,
+        target_buffer: "AgentBuffer",
+        key_list: List[AgentBufferKey] = None,
+        batch_size: int = None,
+        training_length: int = None,
+    ) -> None:
+        """
+        Takes in a batch size and training length (sequence length), and appends this AgentBuffer to target_buffer
+        properly padded for LSTM use. Optionally, use key_list to restrict which fields are inserted into the new
+        buffer.
+        :param target_buffer: The buffer which to append the samples to.
+        :param key_list: The fields that must be added. If None: all fields will be appended.
+        :param batch_size: The number of elements that must be appended. If None: All of them will be.
+        :param training_length: The length of the samples that must be appended. If None: only takes one element.
+        """
+        if key_list is None:
+            key_list = list(self.keys())
+        if not self.check_length(key_list):
+            raise BufferException(
+                f"The length of the fields {key_list} were not of same length"
+            )
+        for field_key in key_list:
+            target_buffer[field_key].extend(
+                self[field_key].get_batch(
+                    batch_size=batch_size, training_length=training_length
+                )
+            )
+    @property
+    def num_experiences(self) -> int:
+        """
+        The number of agent experiences in the AgentBuffer, i.e. the length of the buffer.
+        An experience consists of one element across all of the fields of this AgentBuffer.
+        Note that these all have to be the same length, otherwise shuffle and append_to_update_buffer
+        will fail.
+        """
+        if self.values():
+            return len(next(iter(self.values())))
+        else:
+            return 0

MLPY/Lib/site-packages/mlagents/trainers/cli_utils.py ADDED Viewed

	@@ -0,0 +1,331 @@

+from typing import Set, Dict, Any, TextIO
+import os
+import yaml
+from mlagents.trainers.exception import TrainerConfigError
+from mlagents_envs.environment import UnityEnvironment
+import argparse
+from mlagents_envs import logging_util
+logger = logging_util.get_logger(__name__)
+class RaiseRemovedWarning(argparse.Action):
+    """
+    Internal custom Action to raise warning when argument is called.
+    """
+    def __init__(self, nargs=0, **kwargs):
+        super().__init__(nargs=nargs, **kwargs)
+    def __call__(self, arg_parser, namespace, values, option_string=None):
+        logger.warning(f"The command line argument {option_string} was removed.")
+class DetectDefault(argparse.Action):
+    """
+    Internal custom Action to help detect arguments that aren't default.
+    """
+    non_default_args: Set[str] = set()
+    def __call__(self, arg_parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, values)
+        DetectDefault.non_default_args.add(self.dest)
+class DetectDefaultStoreTrue(DetectDefault):
+    """
+    Internal class to help detect arguments that aren't default.
+    Used for store_true arguments.
+    """
+    def __init__(self, nargs=0, **kwargs):
+        super().__init__(nargs=nargs, **kwargs)
+    def __call__(self, arg_parser, namespace, values, option_string=None):
+        super().__call__(arg_parser, namespace, True, option_string)
+class StoreConfigFile(argparse.Action):
+    """
+    Custom Action to store the config file location not as part of the CLI args.
+    This is because we want to maintain an equivalence between the config file's
+    contents and the args themselves.
+    """
+    trainer_config_path: str
+    def __call__(self, arg_parser, namespace, values, option_string=None):
+        delattr(namespace, self.dest)
+        StoreConfigFile.trainer_config_path = values
+def _create_parser() -> argparse.ArgumentParser:
+    argparser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    argparser.add_argument(
+        "trainer_config_path", action=StoreConfigFile, nargs="?", default=None
+    )
+    argparser.add_argument(
+        "--env",
+        default=None,
+        dest="env_path",
+        help="Path to the Unity executable to train",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--load",
+        default=False,
+        dest="load_model",
+        action=DetectDefaultStoreTrue,
+        help=argparse.SUPPRESS,  # Deprecated but still usable for now.
+    )
+    argparser.add_argument(
+        "--resume",
+        default=False,
+        dest="resume",
+        action=DetectDefaultStoreTrue,
+        help="Whether to resume training from a checkpoint. Specify a --run-id to use this option. "
+        "If set, the training code loads an already trained model to initialize the neural network "
+        "before resuming training. This option is only valid when the models exist, and have the same "
+        "behavior names as the current agents in your scene.",
+    )
+    argparser.add_argument(
+        "--deterministic",
+        default=False,
+        dest="deterministic",
+        action=DetectDefaultStoreTrue,
+        help="Whether to select actions deterministically in policy. `dist.mean` for continuous action "
+        "space, and `dist.argmax` for deterministic action space ",
+    )
+    argparser.add_argument(
+        "--force",
+        default=False,
+        dest="force",
+        action=DetectDefaultStoreTrue,
+        help="Whether to force-overwrite this run-id's existing summary and model data. (Without "
+        "this flag, attempting to train a model with a run-id that has been used before will throw "
+        "an error.",
+    )
+    argparser.add_argument(
+        "--run-id",
+        default="ppo",
+        help="The identifier for the training run. This identifier is used to name the "
+        "subdirectories in which the trained model and summary statistics are saved as well "
+        "as the saved model itself. If you use TensorBoard to view the training statistics, "
+        "always set a unique run-id for each training run. (The statistics for all runs with the "
+        "same id are combined as if they were produced by a the same session.)",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--initialize-from",
+        metavar="RUN_ID",
+        default=None,
+        help="Specify a previously saved run ID from which to initialize the model from. "
+        "This can be used, for instance, to fine-tune an existing model on a new environment. "
+        "Note that the previously saved models must have the same behavior parameters as your "
+        "current environment.",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--seed",
+        default=-1,
+        type=int,
+        help="A number to use as a seed for the random number generator used by the training code",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--train",
+        default=False,
+        dest="train_model",
+        action=DetectDefaultStoreTrue,
+        help=argparse.SUPPRESS,
+    )
+    argparser.add_argument(
+        "--inference",
+        default=False,
+        dest="inference",
+        action=DetectDefaultStoreTrue,
+        help="Whether to run in Python inference mode (i.e. no training). Use with --resume to load "
+        "a model trained with an existing run ID.",
+    )
+    argparser.add_argument(
+        "--base-port",
+        default=UnityEnvironment.BASE_ENVIRONMENT_PORT,
+        type=int,
+        help="The starting port for environment communication. Each concurrent Unity environment "
+        "instance will get assigned a port sequentially, starting from the base-port. Each instance "
+        "will use the port (base_port + worker_id), where the worker_id is sequential IDs given to "
+        "each instance from 0 to (num_envs - 1). Note that when training using the Editor rather "
+        "than an executable, the base port will be ignored.",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--num-envs",
+        default=1,
+        type=int,
+        help="The number of concurrent Unity environment instances to collect experiences "
+        "from when training",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--num-areas",
+        default=1,
+        type=int,
+        help="The number of parallel training areas in each Unity environment instance.",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--debug",
+        default=False,
+        action=DetectDefaultStoreTrue,
+        help="Whether to enable debug-level logging for some parts of the code",
+    )
+    argparser.add_argument(
+        "--env-args",
+        default=None,
+        nargs=argparse.REMAINDER,
+        help="Arguments passed to the Unity executable. Be aware that the standalone build will also "
+        "process these as Unity Command Line Arguments. You should choose different argument names if "
+        "you want to create environment-specific arguments. All arguments after this flag will be "
+        "passed to the executable.",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--max-lifetime-restarts",
+        default=10,
+        help="The max number of times a single Unity executable can crash over its lifetime before ml-agents exits. "
+        "Can be set to -1 if no limit is desired.",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--restarts-rate-limit-n",
+        default=1,
+        help="The maximum number of times a single Unity executable can crash over a period of time (period set in "
+        "restarts-rate-limit-period-s). Can be set to -1 to not use rate limiting with restarts.",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--restarts-rate-limit-period-s",
+        default=60,
+        help="The period of time --restarts-rate-limit-n applies to.",
+        action=DetectDefault,
+    )
+    argparser.add_argument(
+        "--torch",
+        default=False,
+        action=RaiseRemovedWarning,
+        help="(Removed) Use the PyTorch framework.",
+    )
+    argparser.add_argument(
+        "--tensorflow",
+        default=False,
+        action=RaiseRemovedWarning,
+        help="(Removed) Use the TensorFlow framework.",
+    )
+    argparser.add_argument(
+        "--results-dir",
+        default="results",
+        action=DetectDefault,
+        help="Results base directory",
+    )
+    eng_conf = argparser.add_argument_group(title="Engine Configuration")
+    eng_conf.add_argument(
+        "--width",
+        default=84,
+        type=int,
+        help="The width of the executable window of the environment(s) in pixels "
+        "(ignored for editor training).",
+        action=DetectDefault,
+    )
+    eng_conf.add_argument(
+        "--height",
+        default=84,
+        type=int,
+        help="The height of the executable window of the environment(s) in pixels "
+        "(ignored for editor training)",
+        action=DetectDefault,
+    )
+    eng_conf.add_argument(
+        "--quality-level",
+        default=5,
+        type=int,
+        help="The quality level of the environment(s). Equivalent to calling "
+        "QualitySettings.SetQualityLevel in Unity.",
+        action=DetectDefault,
+    )
+    eng_conf.add_argument(
+        "--time-scale",
+        default=20,
+        type=float,
+        help="The time scale of the Unity environment(s). Equivalent to setting "
+        "Time.timeScale in Unity.",
+        action=DetectDefault,
+    )
+    eng_conf.add_argument(
+        "--target-frame-rate",
+        default=-1,
+        type=int,
+        help="The target frame rate of the Unity environment(s). Equivalent to setting "
+        "Application.targetFrameRate in Unity.",
+        action=DetectDefault,
+    )
+    eng_conf.add_argument(
+        "--capture-frame-rate",
+        default=60,
+        type=int,
+        help="The capture frame rate of the Unity environment(s). Equivalent to setting "
+        "Time.captureFramerate in Unity.",
+        action=DetectDefault,
+    )
+    eng_conf.add_argument(
+        "--no-graphics",
+        default=False,
+        action=DetectDefaultStoreTrue,
+        help="Whether to run the Unity executable in no-graphics mode (i.e. without initializing "
+        "the graphics driver. Use this only if your agents don't use visual observations.",
+    )
+    torch_conf = argparser.add_argument_group(title="Torch Configuration")
+    torch_conf.add_argument(
+        "--torch-device",
+        default=None,
+        dest="device",
+        action=DetectDefault,
+        help='Settings for the default torch.device used in training, for example, "cpu", "cuda", or "cuda:0"',
+    )
+    return argparser
+def load_config(config_path: str) -> Dict[str, Any]:
+    try:
+        with open(config_path) as data_file:
+            return _load_config(data_file)
+    except OSError:
+        abs_path = os.path.abspath(config_path)
+        raise TrainerConfigError(f"Config file could not be found at {abs_path}.")
+    except UnicodeDecodeError:
+        raise TrainerConfigError(
+            f"There was an error decoding Config file from {config_path}. "
+            f"Make sure your file is save using UTF-8"
+        )
+def _load_config(fp: TextIO) -> Dict[str, Any]:
+    """
+    Load the yaml config from the file-like object.
+    """
+    try:
+        return yaml.safe_load(fp)
+    except yaml.parser.ParserError as e:
+        raise TrainerConfigError(
+            "Error parsing yaml file. Please check for formatting errors. "
+            "A tool such as http://www.yamllint.com/ can be helpful with this."
+        ) from e
+parser = _create_parser()

MLPY/Lib/site-packages/mlagents/trainers/demo_loader.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import os
+from typing import List, Tuple
+import numpy as np
+from mlagents.trainers.buffer import AgentBuffer, BufferKey
+from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
+    AgentInfoActionPairProto,
+)
+from mlagents.trainers.trajectory import ObsUtil
+from mlagents_envs.rpc_utils import behavior_spec_from_proto, steps_from_proto
+from mlagents_envs.base_env import BehaviorSpec
+from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
+from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
+    DemonstrationMetaProto,
+)
+from mlagents_envs.timers import timed, hierarchical_timer
+from google.protobuf.internal.decoder import _DecodeVarint32  # type: ignore
+from google.protobuf.internal.encoder import _EncodeVarint  # type: ignore
+INITIAL_POS = 33
+SUPPORTED_DEMONSTRATION_VERSIONS = frozenset([0, 1])
+@timed
+def make_demo_buffer(
+    pair_infos: List[AgentInfoActionPairProto],
+    behavior_spec: BehaviorSpec,
+    sequence_length: int,
+) -> AgentBuffer:
+    # Create and populate buffer using experiences
+    demo_raw_buffer = AgentBuffer()
+    demo_processed_buffer = AgentBuffer()
+    for idx, current_pair_info in enumerate(pair_infos):
+        if idx > len(pair_infos) - 2:
+            break
+        next_pair_info = pair_infos[idx + 1]
+        current_decision_step, current_terminal_step = steps_from_proto(
+            [current_pair_info.agent_info], behavior_spec
+        )
+        next_decision_step, next_terminal_step = steps_from_proto(
+            [next_pair_info.agent_info], behavior_spec
+        )
+        previous_action = (
+            np.array(
+                pair_infos[idx].action_info.vector_actions_deprecated, dtype=np.float32
+            )
+            * 0
+        )
+        if idx > 0:
+            previous_action = np.array(
+                pair_infos[idx - 1].action_info.vector_actions_deprecated,
+                dtype=np.float32,
+            )
+        next_done = len(next_terminal_step) == 1
+        next_reward = 0
+        if len(next_terminal_step) == 1:
+            next_reward = next_terminal_step.reward[0]
+        else:
+            next_reward = next_decision_step.reward[0]
+        current_obs = None
+        if len(current_terminal_step) == 1:
+            current_obs = list(current_terminal_step.values())[0].obs
+        else:
+            current_obs = list(current_decision_step.values())[0].obs
+        demo_raw_buffer[BufferKey.DONE].append(next_done)
+        demo_raw_buffer[BufferKey.ENVIRONMENT_REWARDS].append(next_reward)
+        for i, obs in enumerate(current_obs):
+            demo_raw_buffer[ObsUtil.get_name_at(i)].append(obs)
+        if (
+            len(current_pair_info.action_info.continuous_actions) == 0
+            and len(current_pair_info.action_info.discrete_actions) == 0
+        ):
+            if behavior_spec.action_spec.continuous_size > 0:
+                demo_raw_buffer[BufferKey.CONTINUOUS_ACTION].append(
+                    current_pair_info.action_info.vector_actions_deprecated
+                )
+            else:
+                demo_raw_buffer[BufferKey.DISCRETE_ACTION].append(
+                    current_pair_info.action_info.vector_actions_deprecated
+                )
+        else:
+            if behavior_spec.action_spec.continuous_size > 0:
+                demo_raw_buffer[BufferKey.CONTINUOUS_ACTION].append(
+                    current_pair_info.action_info.continuous_actions
+                )
+            if behavior_spec.action_spec.discrete_size > 0:
+                demo_raw_buffer[BufferKey.DISCRETE_ACTION].append(
+                    current_pair_info.action_info.discrete_actions
+                )
+        demo_raw_buffer[BufferKey.PREV_ACTION].append(previous_action)
+        if next_done:
+            demo_raw_buffer.resequence_and_append(
+                demo_processed_buffer, batch_size=None, training_length=sequence_length
+            )
+            demo_raw_buffer.reset_agent()
+    demo_raw_buffer.resequence_and_append(
+        demo_processed_buffer, batch_size=None, training_length=sequence_length
+    )
+    return demo_processed_buffer
+@timed
+def demo_to_buffer(
+    file_path: str, sequence_length: int, expected_behavior_spec: BehaviorSpec = None
+) -> Tuple[BehaviorSpec, AgentBuffer]:
+    """
+    Loads demonstration file and uses it to fill training buffer.
+    :param file_path: Location of demonstration file (.demo).
+    :param sequence_length: Length of trajectories to fill buffer.
+    :return:
+    """
+    behavior_spec, info_action_pair, _ = load_demonstration(file_path)
+    demo_buffer = make_demo_buffer(info_action_pair, behavior_spec, sequence_length)
+    if expected_behavior_spec:
+        # check action dimensions in demonstration match
+        if behavior_spec.action_spec != expected_behavior_spec.action_spec:
+            raise RuntimeError(
+                "The actions {} in demonstration do not match the policy's {}.".format(
+                    behavior_spec.action_spec, expected_behavior_spec.action_spec
+                )
+            )
+        # check observations match
+        if len(behavior_spec.observation_specs) != len(
+            expected_behavior_spec.observation_specs
+        ):
+            raise RuntimeError(
+                "The demonstrations do not have the same number of observations as the policy."
+            )
+        else:
+            for i, (demo_obs, policy_obs) in enumerate(
+                zip(
+                    behavior_spec.observation_specs,
+                    expected_behavior_spec.observation_specs,
+                )
+            ):
+                if demo_obs.shape != policy_obs.shape:
+                    raise RuntimeError(
+                        f"The shape {demo_obs} for observation {i} in demonstration \
+                        do not match the policy's {policy_obs}."
+                    )
+    return behavior_spec, demo_buffer
+def get_demo_files(path: str) -> List[str]:
+    """
+    Retrieves the demonstration file(s) from a path.
+    :param path: Path of demonstration file or directory.
+    :return: List of demonstration files
+    Raises errors if |path| is invalid.
+    """
+    if os.path.isfile(path):
+        if not path.endswith(".demo"):
+            raise ValueError("The path provided is not a '.demo' file.")
+        return [path]
+    elif os.path.isdir(path):
+        paths = [
+            os.path.join(path, name)
+            for name in os.listdir(path)
+            if name.endswith(".demo")
+        ]
+        if not paths:
+            raise ValueError("There are no '.demo' files in the provided directory.")
+        return paths
+    else:
+        raise FileNotFoundError(
+            f"The demonstration file or directory {path} does not exist."
+        )
+@timed
+def load_demonstration(
+    file_path: str,
+) -> Tuple[BehaviorSpec, List[AgentInfoActionPairProto], int]:
+    """
+    Loads and parses a demonstration file.
+    :param file_path: Location of demonstration file (.demo).
+    :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data.
+    """
+    # First 32 bytes of file dedicated to meta-data.
+    file_paths = get_demo_files(file_path)
+    behavior_spec = None
+    brain_param_proto = None
+    info_action_pairs = []
+    total_expected = 0
+    for _file_path in file_paths:
+        with open(_file_path, "rb") as fp:
+            with hierarchical_timer("read_file"):
+                data = fp.read()
+            next_pos, pos, obs_decoded = 0, 0, 0
+            while pos < len(data):
+                next_pos, pos = _DecodeVarint32(data, pos)
+                if obs_decoded == 0:
+                    meta_data_proto = DemonstrationMetaProto()
+                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
+                    if (
+                        meta_data_proto.api_version
+                        not in SUPPORTED_DEMONSTRATION_VERSIONS
+                    ):
+                        raise RuntimeError(
+                            f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})"
+                        )
+                    total_expected += meta_data_proto.number_steps
+                    pos = INITIAL_POS
+                if obs_decoded == 1:
+                    brain_param_proto = BrainParametersProto()
+                    brain_param_proto.ParseFromString(data[pos : pos + next_pos])
+                    pos += next_pos
+                if obs_decoded > 1:
+                    agent_info_action = AgentInfoActionPairProto()
+                    agent_info_action.ParseFromString(data[pos : pos + next_pos])
+                    if behavior_spec is None:
+                        behavior_spec = behavior_spec_from_proto(
+                            brain_param_proto, agent_info_action.agent_info
+                        )
+                    info_action_pairs.append(agent_info_action)
+                    if len(info_action_pairs) == total_expected:
+                        break
+                    pos += next_pos
+                obs_decoded += 1
+    if not behavior_spec:
+        raise RuntimeError(
+            f"No BrainParameters found in demonstration file at {file_path}."
+        )
+    return behavior_spec, info_action_pairs, total_expected
+def write_delimited(f, message):
+    msg_string = message.SerializeToString()
+    msg_size = len(msg_string)
+    _EncodeVarint(f.write, msg_size)
+    f.write(msg_string)
+def write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos):
+    with open(demo_path, "wb") as f:
+        # write metadata
+        write_delimited(f, meta_data_proto)
+        f.seek(INITIAL_POS)
+        write_delimited(f, brain_param_proto)
+        for agent in agent_info_protos:
+            write_delimited(f, agent)

MLPY/Lib/site-packages/mlagents/trainers/directory_utils.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os
+from mlagents.trainers.exception import UnityTrainerException
+from mlagents.trainers.settings import TrainerSettings
+from mlagents.trainers.model_saver.torch_model_saver import DEFAULT_CHECKPOINT_NAME
+def validate_existing_directories(
+    output_path: str, resume: bool, force: bool, init_path: str = None
+) -> None:
+    """
+    Validates that if the run_id model exists, we do not overwrite it unless --force is specified.
+    Throws an exception if resume isn't specified and run_id exists. Throws an exception
+    if --resume is specified and run-id was not found.
+    :param model_path: The model path specified.
+    :param summary_path: The summary path to be used.
+    :param resume: Whether or not the --resume flag was passed.
+    :param force: Whether or not the --force flag was passed.
+    :param init_path: Path to run-id dir to initialize from
+    """
+    output_path_exists = os.path.isdir(output_path)
+    if output_path_exists:
+        if not resume and not force:
+            raise UnityTrainerException(
+                "Previous data from this run ID was found. "
+                "Either specify a new run ID, use --resume to resume this run, "
+                "or use the --force parameter to overwrite existing data."
+            )
+    else:
+        if resume:
+            raise UnityTrainerException(
+                "Previous data from this run ID was not found. "
+                "Train a new run by removing the --resume flag."
+            )
+    # Verify init path if specified.
+    if init_path is not None:
+        if not os.path.isdir(init_path):
+            raise UnityTrainerException(
+                "Could not initialize from {}. "
+                "Make sure models have already been saved with that run ID.".format(
+                    init_path
+                )
+            )
+def setup_init_path(
+    behaviors: TrainerSettings.DefaultTrainerDict, init_dir: str
+) -> None:
+    """
+    For each behavior, setup full init_path to checkpoint file to initialize policy from
+    :param behaviors: mapping from behavior_name to TrainerSettings
+    :param init_dir: Path to run-id dir to initialize from
+    """
+    for behavior_name, ts in behaviors.items():
+        if ts.init_path is None:
+            # set default if None
+            ts.init_path = os.path.join(
+                init_dir, behavior_name, DEFAULT_CHECKPOINT_NAME
+            )
+        elif not os.path.dirname(ts.init_path):
+            # update to full path if just the file name
+            ts.init_path = os.path.join(init_dir, behavior_name, ts.init_path)
+        _validate_init_full_path(ts.init_path)
+def _validate_init_full_path(init_file: str) -> None:
+    """
+    Validate initialization path to be a .pt file
+    :param init_file: full path to initialization checkpoint file
+    """
+    if not (os.path.isfile(init_file) and init_file.endswith(".pt")):
+        raise UnityTrainerException(
+            f"Could not initialize from {init_file}. file does not exists or is not a `.pt` file"
+        )

MLPY/Lib/site-packages/mlagents/trainers/env_manager.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from abc import ABC, abstractmethod
+from typing import List, Dict, NamedTuple, Iterable, Tuple
+from mlagents_envs.base_env import (
+    DecisionSteps,
+    TerminalSteps,
+    BehaviorSpec,
+    BehaviorName,
+)
+from mlagents_envs.side_channel.stats_side_channel import EnvironmentStats
+from mlagents.trainers.policy import Policy
+from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
+from mlagents.trainers.action_info import ActionInfo
+from mlagents.trainers.settings import TrainerSettings
+from mlagents_envs.logging_util import get_logger
+AllStepResult = Dict[BehaviorName, Tuple[DecisionSteps, TerminalSteps]]
+AllGroupSpec = Dict[BehaviorName, BehaviorSpec]
+logger = get_logger(__name__)
+class EnvironmentStep(NamedTuple):
+    current_all_step_result: AllStepResult
+    worker_id: int
+    brain_name_to_action_info: Dict[BehaviorName, ActionInfo]
+    environment_stats: EnvironmentStats
+    @property
+    def name_behavior_ids(self) -> Iterable[BehaviorName]:
+        return self.current_all_step_result.keys()
+    @staticmethod
+    def empty(worker_id: int) -> "EnvironmentStep":
+        return EnvironmentStep({}, worker_id, {}, {})
+class EnvManager(ABC):
+    def __init__(self):
+        self.policies: Dict[BehaviorName, Policy] = {}
+        self.agent_managers: Dict[BehaviorName, AgentManager] = {}
+        self.first_step_infos: List[EnvironmentStep] = []
+    def set_policy(self, brain_name: BehaviorName, policy: Policy) -> None:
+        self.policies[brain_name] = policy
+        if brain_name in self.agent_managers:
+            self.agent_managers[brain_name].policy = policy
+    def set_agent_manager(
+        self, brain_name: BehaviorName, manager: AgentManager
+    ) -> None:
+        self.agent_managers[brain_name] = manager
+    @abstractmethod
+    def _step(self) -> List[EnvironmentStep]:
+        pass
+    @abstractmethod
+    def _reset_env(self, config: Dict = None) -> List[EnvironmentStep]:
+        pass
+    def reset(self, config: Dict = None) -> int:
+        for manager in self.agent_managers.values():
+            manager.end_episode()
+        # Save the first step infos, after the reset.
+        # They will be processed on the first advance().
+        self.first_step_infos = self._reset_env(config)
+        return len(self.first_step_infos)
+    @abstractmethod
+    def set_env_parameters(self, config: Dict = None) -> None:
+        """
+        Sends environment parameter settings to C# via the
+        EnvironmentParametersSideChannel.
+        :param config: Dict of environment parameter keys and values
+        """
+        pass
+    def on_training_started(
+        self, behavior_name: str, trainer_settings: TrainerSettings
+    ) -> None:
+        """
+        Handle traing starting for a new behavior type. Generally nothing is necessary here.
+        :param behavior_name:
+        :param trainer_settings:
+        :return:
+        """
+        pass
+    @property
+    @abstractmethod
+    def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:
+        pass
+    @abstractmethod
+    def close(self):
+        pass
+    def get_steps(self) -> List[EnvironmentStep]:
+        """
+        Updates the policies, steps the environments, and returns the step information from the environments.
+        Calling code should pass the returned EnvironmentSteps to process_steps() after calling this.
+        :return: The list of EnvironmentSteps
+        """
+        # If we had just reset, process the first EnvironmentSteps.
+        # Note that we do it here instead of in reset() so that on the very first reset(),
+        # we can create the needed AgentManagers before calling advance() and processing the EnvironmentSteps.
+        if self.first_step_infos:
+            self._process_step_infos(self.first_step_infos)
+            self.first_step_infos = []
+        # Get new policies if found. Always get the latest policy.
+        for brain_name in self.agent_managers.keys():
+            _policy = None
+            try:
+                # We make sure to empty the policy queue before continuing to produce steps.
+                # This halts the trainers until the policy queue is empty.
+                while True:
+                    _policy = self.agent_managers[brain_name].policy_queue.get_nowait()
+            except AgentManagerQueue.Empty:
+                if _policy is not None:
+                    self.set_policy(brain_name, _policy)
+        # Step the environments
+        new_step_infos = self._step()
+        return new_step_infos
+    def process_steps(self, new_step_infos: List[EnvironmentStep]) -> int:
+        # Add to AgentProcessor
+        num_step_infos = self._process_step_infos(new_step_infos)
+        return num_step_infos
+    def _process_step_infos(self, step_infos: List[EnvironmentStep]) -> int:
+        for step_info in step_infos:
+            for name_behavior_id in step_info.name_behavior_ids:
+                if name_behavior_id not in self.agent_managers:
+                    logger.warning(
+                        "Agent manager was not created for behavior id {}.".format(
+                            name_behavior_id
+                        )
+                    )
+                    continue
+                decision_steps, terminal_steps = step_info.current_all_step_result[
+                    name_behavior_id
+                ]
+                self.agent_managers[name_behavior_id].add_experiences(
+                    decision_steps,
+                    terminal_steps,
+                    step_info.worker_id,
+                    step_info.brain_name_to_action_info.get(
+                        name_behavior_id, ActionInfo.empty()
+                    ),
+                )
+                self.agent_managers[name_behavior_id].record_environment_stats(
+                    step_info.environment_stats, step_info.worker_id
+                )
+        return len(step_infos)

MLPY/Lib/site-packages/mlagents/trainers/environment_parameter_manager.py ADDED Viewed

	@@ -0,0 +1,186 @@

+from typing import Dict, List, Tuple, Optional
+from mlagents.trainers.settings import (
+    EnvironmentParameterSettings,
+    ParameterRandomizationSettings,
+)
+from collections import defaultdict
+from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType
+from mlagents_envs.logging_util import get_logger
+logger = get_logger(__name__)
+class EnvironmentParameterManager:
+    def __init__(
+        self,
+        settings: Optional[Dict[str, EnvironmentParameterSettings]] = None,
+        run_seed: int = -1,
+        restore: bool = False,
+    ):
+        """
+        EnvironmentParameterManager manages all the environment parameters of a training
+        session. It determines when parameters should change and gives access to the
+        current sampler of each parameter.
+        :param settings: A dictionary from environment parameter to
+        EnvironmentParameterSettings.
+        :param run_seed: When the seed is not provided for an environment parameter,
+        this seed will be used instead.
+        :param restore: If true, the EnvironmentParameterManager will use the
+        GlobalTrainingStatus to try and reload the lesson status of each environment
+        parameter.
+        """
+        if settings is None:
+            settings = {}
+        self._dict_settings = settings
+        for parameter_name in self._dict_settings.keys():
+            initial_lesson = GlobalTrainingStatus.get_parameter_state(
+                parameter_name, StatusType.LESSON_NUM
+            )
+            if initial_lesson is None or not restore:
+                GlobalTrainingStatus.set_parameter_state(
+                    parameter_name, StatusType.LESSON_NUM, 0
+                )
+        self._smoothed_values: Dict[str, float] = defaultdict(float)
+        for key in self._dict_settings.keys():
+            self._smoothed_values[key] = 0.0
+        # Update the seeds of the samplers
+        self._set_sampler_seeds(run_seed)
+    def _set_sampler_seeds(self, seed):
+        """
+        Sets the seeds for the samplers (if no seed was already present). Note that
+        using the provided seed.
+        """
+        offset = 0
+        for settings in self._dict_settings.values():
+            for lesson in settings.curriculum:
+                if lesson.value.seed == -1:
+                    lesson.value.seed = seed + offset
+                    offset += 1
+    def get_minimum_reward_buffer_size(self, behavior_name: str) -> int:
+        """
+        Calculates the minimum size of the reward buffer a behavior must use. This
+        method uses the 'min_lesson_length' sampler_parameter to determine this value.
+        :param behavior_name: The name of the behavior the minimum reward buffer
+        size corresponds to.
+        """
+        result = 1
+        for settings in self._dict_settings.values():
+            for lesson in settings.curriculum:
+                if lesson.completion_criteria is not None:
+                    if lesson.completion_criteria.behavior == behavior_name:
+                        result = max(
+                            result, lesson.completion_criteria.min_lesson_length
+                        )
+        return result
+    def get_current_samplers(self) -> Dict[str, ParameterRandomizationSettings]:
+        """
+        Creates a dictionary from environment parameter name to their corresponding
+        ParameterRandomizationSettings. If curriculum is used, the
+        ParameterRandomizationSettings corresponds to the sampler of the current lesson.
+        """
+        samplers: Dict[str, ParameterRandomizationSettings] = {}
+        for param_name, settings in self._dict_settings.items():
+            lesson_num = GlobalTrainingStatus.get_parameter_state(
+                param_name, StatusType.LESSON_NUM
+            )
+            lesson = settings.curriculum[lesson_num]
+            samplers[param_name] = lesson.value
+        return samplers
+    def get_current_lesson_number(self) -> Dict[str, int]:
+        """
+        Creates a dictionary from environment parameter to the current lesson number.
+        If not using curriculum, this number is always 0 for that environment parameter.
+        """
+        result: Dict[str, int] = {}
+        for parameter_name in self._dict_settings.keys():
+            result[parameter_name] = GlobalTrainingStatus.get_parameter_state(
+                parameter_name, StatusType.LESSON_NUM
+            )
+        return result
+    def log_current_lesson(self, parameter_name: Optional[str] = None) -> None:
+        """
+        Logs the current lesson number and sampler value of the parameter with name
+        parameter_name. If no parameter_name is provided, the values and lesson
+        numbers of all parameters will be displayed.
+        """
+        if parameter_name is not None:
+            settings = self._dict_settings[parameter_name]
+            lesson_number = GlobalTrainingStatus.get_parameter_state(
+                parameter_name, StatusType.LESSON_NUM
+            )
+            lesson_name = settings.curriculum[lesson_number].name
+            lesson_value = settings.curriculum[lesson_number].value
+            logger.info(
+                f"Parameter '{parameter_name}' is in lesson '{lesson_name}' "
+                f"and has value '{lesson_value}'."
+            )
+        else:
+            for parameter_name, settings in self._dict_settings.items():
+                lesson_number = GlobalTrainingStatus.get_parameter_state(
+                    parameter_name, StatusType.LESSON_NUM
+                )
+                lesson_name = settings.curriculum[lesson_number].name
+                lesson_value = settings.curriculum[lesson_number].value
+                logger.info(
+                    f"Parameter '{parameter_name}' is in lesson '{lesson_name}' "
+                    f"and has value '{lesson_value}'."
+                )
+    def update_lessons(
+        self,
+        trainer_steps: Dict[str, int],
+        trainer_max_steps: Dict[str, int],
+        trainer_reward_buffer: Dict[str, List[float]],
+    ) -> Tuple[bool, bool]:
+        """
+        Given progress metrics, calculates if at least one environment parameter is
+        in a new lesson and if at least one environment parameter requires the env
+        to reset.
+        :param trainer_steps: A dictionary from behavior_name to the number of training
+        steps this behavior's trainer has performed.
+        :param trainer_max_steps: A dictionary from behavior_name to the maximum number
+        of training steps this behavior's trainer has performed.
+        :param trainer_reward_buffer: A dictionary from behavior_name to the list of
+        the most recent episode returns for this behavior's trainer.
+        :returns: A tuple of two booleans : (True if any lesson has changed, True if
+        environment needs to reset)
+        """
+        must_reset = False
+        updated = False
+        for param_name, settings in self._dict_settings.items():
+            lesson_num = GlobalTrainingStatus.get_parameter_state(
+                param_name, StatusType.LESSON_NUM
+            )
+            next_lesson_num = lesson_num + 1
+            lesson = settings.curriculum[lesson_num]
+            if (
+                lesson.completion_criteria is not None
+                and len(settings.curriculum) > next_lesson_num
+            ):
+                behavior_to_consider = lesson.completion_criteria.behavior
+                if behavior_to_consider in trainer_steps:
+                    (
+                        must_increment,
+                        new_smoothing,
+                    ) = lesson.completion_criteria.need_increment(
+                        float(trainer_steps[behavior_to_consider])
+                        / float(trainer_max_steps[behavior_to_consider]),
+                        trainer_reward_buffer[behavior_to_consider],
+                        self._smoothed_values[param_name],
+                    )
+                    self._smoothed_values[param_name] = new_smoothing
+                    if must_increment:
+                        GlobalTrainingStatus.set_parameter_state(
+                            param_name, StatusType.LESSON_NUM, next_lesson_num
+                        )
+                        self.log_current_lesson(param_name)
+                        updated = True
+                        if lesson.completion_criteria.require_reset:
+                            must_reset = True
+        return updated, must_reset

MLPY/Lib/site-packages/mlagents/trainers/exception.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""
+Contains exceptions for the trainers package.
+"""
+class TrainerError(Exception):
+    """
+    Any error related to the trainers in the ML-Agents Toolkit.
+    """
+    pass
+class TrainerConfigError(Exception):
+    """
+    Any error related to the configuration of trainers in the ML-Agents Toolkit.
+    """
+    pass
+class TrainerConfigWarning(Warning):
+    """
+    Any warning related to the configuration of trainers in the ML-Agents Toolkit.
+    """
+    pass
+class CurriculumError(TrainerError):
+    """
+    Any error related to training with a curriculum.
+    """
+    pass
+class CurriculumLoadingError(CurriculumError):
+    """
+    Any error related to loading the Curriculum config file.
+    """
+    pass
+class CurriculumConfigError(CurriculumError):
+    """
+    Any error related to processing the Curriculum config file.
+    """
+    pass
+class MetaCurriculumError(TrainerError):
+    """
+    Any error related to the configuration of a metacurriculum.
+    """
+    pass
+class SamplerException(TrainerError):
+    """
+    Related to errors with the sampler actions.
+    """
+    pass
+class UnityTrainerException(TrainerError):
+    """
+    Related to errors with the Trainer.
+    """
+    pass

MLPY/Lib/site-packages/mlagents/trainers/ghost/__init__.py ADDED Viewed

File without changes