default_settings: null | |
behaviors: | |
SoccerTwos: | |
trainer_type: poca | |
hyperparameters: | |
batch_size: 2048 | |
buffer_size: 20480 | |
learning_rate: 0.0003 | |
beta: 0.005 | |
epsilon: 0.2 | |
lambd: 0.95 | |
num_epoch: 3 | |
learning_rate_schedule: constant | |
beta_schedule: constant | |
epsilon_schedule: constant | |
checkpoint_interval: 500000 | |
network_settings: | |
normalize: false | |
hidden_units: 512 | |
num_layers: 2 | |
vis_encode_type: simple | |
memory: null | |
goal_conditioning_type: hyper | |
deterministic: false | |
reward_signals: | |
extrinsic: | |
gamma: 0.99 | |
strength: 1.0 | |
network_settings: | |
normalize: false | |
hidden_units: 128 | |
num_layers: 2 | |
vis_encode_type: simple | |
memory: null | |
goal_conditioning_type: hyper | |
deterministic: false | |
init_path: null | |
keep_checkpoints: 5 | |
even_checkpoints: false | |
max_steps: 50000000 | |
time_horizon: 1000 | |
summary_freq: 10000 | |
threaded: false | |
self_play: | |
save_steps: 50000 | |
team_change: 200000 | |
swap_steps: 2000 | |
window: 10 | |
play_against_latest_model_ratio: 0.5 | |
initial_elo: 1200.0 | |
behavioral_cloning: null | |
env_settings: | |
env_path: ./training-envs-executables/linux/SoccerTwos/SoccerTwos.x86_64 | |
env_args: null | |
base_port: 5005 | |
num_envs: 1 | |
num_areas: 1 | |
timeout_wait: 60 | |
seed: -1 | |
max_lifetime_restarts: 10 | |
restarts_rate_limit_n: 1 | |
restarts_rate_limit_period_s: 60 | |
engine_settings: | |
width: 84 | |
height: 84 | |
quality_level: 5 | |
time_scale: 20 | |
target_frame_rate: -1 | |
capture_frame_rate: 60 | |
no_graphics: true | |
environment_parameters: null | |
checkpoint_settings: | |
run_id: SoccerTwos1 | |
initialize_from: null | |
load_model: false | |
resume: true | |
force: false | |
train_model: false | |
inference: false | |
results_dir: results | |
torch_settings: | |
device: null | |
debug: false | |