GameServerO

Sleeping

File size: 8,121 Bytes

e11e4fe

import copy
import json
import hmac
import hashlib
import sys
from typing import Optional, Dict
import mlagents_envs
import mlagents.trainers
from mlagents import torch_utils
from mlagents.trainers.settings import RewardSignalType
from mlagents_envs.exception import UnityCommunicationException
from mlagents_envs.side_channel import (
    IncomingMessage,
    OutgoingMessage,
    DefaultTrainingAnalyticsSideChannel,
)
from mlagents_envs.communicator_objects.training_analytics_pb2 import (
    TrainingEnvironmentInitialized,
    TrainingBehaviorInitialized,
)
from google.protobuf.any_pb2 import Any

from mlagents.trainers.settings import TrainerSettings, RunOptions


class TrainingAnalyticsSideChannel(DefaultTrainingAnalyticsSideChannel):
    """
    Side channel that sends information about the training to the Unity environment so it can be logged.
    """

    __vendorKey: str = "unity.ml-agents"

    def __init__(self) -> None:
        # >>> uuid.uuid5(uuid.NAMESPACE_URL, "com.unity.ml-agents/TrainingAnalyticsSideChannel")
        # UUID('b664a4a9-d86f-5a5f-95cb-e8353a7e8356')
        # Use the same uuid as the parent side channel
        super().__init__()
        self.run_options: Optional[RunOptions] = None

    @classmethod
    def _hash(cls, data: str) -> str:
        res = hmac.new(
            cls.__vendorKey.encode("utf-8"), data.encode("utf-8"), hashlib.sha256
        ).hexdigest()
        return res

    def on_message_received(self, msg: IncomingMessage) -> None:
        raise UnityCommunicationException(
            "The TrainingAnalyticsSideChannel received a message from Unity, "
            "this should not have happened."
        )

    @classmethod
    def _sanitize_run_options(cls, config: RunOptions) -> Dict[str, Any]:
        res = copy.deepcopy(config.as_dict())

        # Filter potentially PII behavior names
        if "behaviors" in res and res["behaviors"]:
            res["behaviors"] = {cls._hash(k): v for (k, v) in res["behaviors"].items()}
            for (k, v) in res["behaviors"].items():
                if "init_path" in v and v["init_path"] is not None:
                    hashed_path = cls._hash(v["init_path"])
                    res["behaviors"][k]["init_path"] = hashed_path
                if "demo_path" in v and v["demo_path"] is not None:
                    hashed_path = cls._hash(v["demo_path"])
                    res["behaviors"][k]["demo_path"] = hashed_path

        # Filter potentially PII curriculum and behavior names from Checkpoint Settings
        if "environment_parameters" in res and res["environment_parameters"]:
            res["environment_parameters"] = {
                cls._hash(k): v for (k, v) in res["environment_parameters"].items()
            }
            for (curriculumName, curriculum) in res["environment_parameters"].items():
                updated_lessons = []
                for lesson in curriculum["curriculum"]:
                    new_lesson = copy.deepcopy(lesson)
                    if "name" in lesson:
                        new_lesson["name"] = cls._hash(lesson["name"])
                    if (
                        "completion_criteria" in lesson
                        and lesson["completion_criteria"] is not None
                    ):
                        new_lesson["completion_criteria"]["behavior"] = cls._hash(
                            new_lesson["completion_criteria"]["behavior"]
                        )
                    updated_lessons.append(new_lesson)
                res["environment_parameters"][curriculumName][
                    "curriculum"
                ] = updated_lessons

        # Filter potentially PII filenames from Checkpoint Settings
        if "checkpoint_settings" in res and res["checkpoint_settings"] is not None:
            if (
                "initialize_from" in res["checkpoint_settings"]
                and res["checkpoint_settings"]["initialize_from"] is not None
            ):
                res["checkpoint_settings"]["initialize_from"] = cls._hash(
                    res["checkpoint_settings"]["initialize_from"]
                )
            if (
                "results_dir" in res["checkpoint_settings"]
                and res["checkpoint_settings"]["results_dir"] is not None
            ):
                res["checkpoint_settings"]["results_dir"] = hash(
                    res["checkpoint_settings"]["results_dir"]
                )

        return res

    def environment_initialized(self, run_options: RunOptions) -> None:
        self.run_options = run_options
        # Tuple of (major, minor, patch)
        vi = sys.version_info
        env_params = run_options.environment_parameters
        sanitized_run_options = self._sanitize_run_options(run_options)

        msg = TrainingEnvironmentInitialized(
            python_version=f"{vi[0]}.{vi[1]}.{vi[2]}",
            mlagents_version=mlagents.trainers.__version__,
            mlagents_envs_version=mlagents_envs.__version__,
            torch_version=torch_utils.torch.__version__,
            torch_device_type=torch_utils.default_device().type,
            num_envs=run_options.env_settings.num_envs,
            num_environment_parameters=len(env_params) if env_params else 0,
            run_options=json.dumps(sanitized_run_options),
        )

        any_message = Any()
        any_message.Pack(msg)

        env_init_msg = OutgoingMessage()
        env_init_msg.set_raw_bytes(any_message.SerializeToString())
        super().queue_message_to_send(env_init_msg)

    @classmethod
    def _sanitize_trainer_settings(cls, config: TrainerSettings) -> Dict[str, Any]:
        config_dict = copy.deepcopy(config.as_dict())
        if "init_path" in config_dict and config_dict["init_path"] is not None:
            hashed_path = cls._hash(config_dict["init_path"])
            config_dict["init_path"] = hashed_path
        if "demo_path" in config_dict and config_dict["demo_path"] is not None:
            hashed_path = cls._hash(config_dict["demo_path"])
            config_dict["demo_path"] = hashed_path
        return config_dict

    def training_started(self, behavior_name: str, config: TrainerSettings) -> None:
        raw_config = self._sanitize_trainer_settings(config)
        msg = TrainingBehaviorInitialized(
            behavior_name=self._hash(behavior_name),
            trainer_type=config.trainer_type,
            extrinsic_reward_enabled=(
                RewardSignalType.EXTRINSIC in config.reward_signals
            ),
            gail_reward_enabled=(RewardSignalType.GAIL in config.reward_signals),
            curiosity_reward_enabled=(
                RewardSignalType.CURIOSITY in config.reward_signals
            ),
            rnd_reward_enabled=(RewardSignalType.RND in config.reward_signals),
            behavioral_cloning_enabled=config.behavioral_cloning is not None,
            recurrent_enabled=config.network_settings.memory is not None,
            visual_encoder=config.network_settings.vis_encode_type.value,
            num_network_layers=config.network_settings.num_layers,
            num_network_hidden_units=config.network_settings.hidden_units,
            trainer_threaded=config.threaded,
            self_play_enabled=config.self_play is not None,
            curriculum_enabled=self._behavior_uses_curriculum(behavior_name),
            config=json.dumps(raw_config),
        )

        any_message = Any()
        any_message.Pack(msg)

        training_start_msg = OutgoingMessage()
        training_start_msg.set_raw_bytes(any_message.SerializeToString())

        super().queue_message_to_send(training_start_msg)

    def _behavior_uses_curriculum(self, behavior_name: str) -> bool:
        if not self.run_options or not self.run_options.environment_parameters:
            return False

        for param_settings in self.run_options.environment_parameters.values():
            for lesson in param_settings.curriculum:
                cc = lesson.completion_criteria
                if cc and cc.behavior == behavior_name:
                    return True

        return False