File size: 3,155 Bytes
05c9ac2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from typing import List, Tuple
from mlagents_envs.base_env import ObservationSpec, DimensionProperty, ObservationType
import pytest
import copy
import os
from mlagents.trainers.settings import (
    TrainerSettings,
    GAILSettings,
    CuriositySettings,
    RewardSignalSettings,
    NetworkSettings,
    RewardSignalType,
    ScheduleType,
)
from mlagents.trainers.ppo.trainer import PPOSettings, TRAINER_NAME as PPO_TRAINER_NAME
from mlagents.trainers.sac.trainer import SACSettings, TRAINER_NAME as SAC_TRAINER_NAME
from mlagents.trainers.poca.trainer import (
    POCASettings,
    TRAINER_NAME as POCA_TRAINER_NAME,
)

CONTINUOUS_DEMO_PATH = os.path.dirname(os.path.abspath(__file__)) + "/test.demo"
DISCRETE_DEMO_PATH = os.path.dirname(os.path.abspath(__file__)) + "/testdcvis.demo"

_PPO_CONFIG = TrainerSettings(
    trainer_type=PPO_TRAINER_NAME,
    hyperparameters=PPOSettings(
        learning_rate=5.0e-3,
        learning_rate_schedule=ScheduleType.CONSTANT,
        batch_size=16,
        buffer_size=64,
    ),
    network_settings=NetworkSettings(num_layers=1, hidden_units=32),
    summary_freq=500,
    max_steps=3000,
    threaded=False,
)

_SAC_CONFIG = TrainerSettings(
    trainer_type=SAC_TRAINER_NAME,
    hyperparameters=SACSettings(
        learning_rate=5.0e-3,
        learning_rate_schedule=ScheduleType.CONSTANT,
        batch_size=8,
        buffer_init_steps=100,
        buffer_size=5000,
        tau=0.01,
        init_entcoef=0.01,
    ),
    network_settings=NetworkSettings(num_layers=1, hidden_units=16),
    summary_freq=100,
    max_steps=1000,
    threaded=False,
)

_POCA_CONFIG = TrainerSettings(
    trainer_type=POCA_TRAINER_NAME,
    hyperparameters=POCASettings(
        learning_rate=5.0e-3,
        learning_rate_schedule=ScheduleType.CONSTANT,
        batch_size=16,
        buffer_size=64,
    ),
    network_settings=NetworkSettings(num_layers=1, hidden_units=32),
    summary_freq=500,
    max_steps=3000,
    threaded=False,
)


def ppo_dummy_config():
    return copy.deepcopy(_PPO_CONFIG)


def sac_dummy_config():
    return copy.deepcopy(_SAC_CONFIG)


def poca_dummy_config():
    return copy.deepcopy(_POCA_CONFIG)


@pytest.fixture
def gail_dummy_config():
    return {RewardSignalType.GAIL: GAILSettings(demo_path=CONTINUOUS_DEMO_PATH)}


@pytest.fixture
def curiosity_dummy_config():
    return {RewardSignalType.CURIOSITY: CuriositySettings()}


@pytest.fixture
def extrinsic_dummy_config():
    return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}


def create_observation_specs_with_shapes(
    shapes: List[Tuple[int, ...]]
) -> List[ObservationSpec]:
    obs_specs: List[ObservationSpec] = []
    for i, shape in enumerate(shapes):
        dim_prop = (DimensionProperty.UNSPECIFIED,) * len(shape)
        if len(shape) == 2:
            dim_prop = (DimensionProperty.VARIABLE_SIZE, DimensionProperty.NONE)
        spec = ObservationSpec(
            name=f"observation {i} with shape {shape}",
            shape=shape,
            dimension_property=dim_prop,
            observation_type=ObservationType.DEFAULT,
        )
        obs_specs.append(spec)
    return obs_specs