behaviors: | |
SoccerTwos: | |
trainer_type: poca | |
hyperparameters: | |
batch_size: 2048 | |
buffer_size: 102400 | |
learning_rate: 0.0005 | |
beta: 0.005 | |
epsilon: 0.2 | |
lambd: 0.95 | |
num_epoch: 5 | |
learning_rate_schedule: linear | |
network_settings: | |
normalize: false | |
hidden_units: 384 | |
num_layers: 2 | |
vis_encode_type: simple | |
reward_signals: | |
extrinsic: | |
gamma: 0.97 | |
strength: 1.0 | |
keep_checkpoints: 5 | |
max_steps: 10000000 | |
time_horizon: 1512 | |
summary_freq: 20000 | |
self_play: | |
save_steps: 75000 | |
team_change: 300000 | |
swap_steps: 30000 | |
window: 17 | |
play_against_latest_model_ratio: 0.6 | |
initial_elo: 1200.0 | |