mcuu-table-2-h5mclbzr / config.yaml
ahalev's picture
Upload config.yaml with huggingface_hub
1b2de93 verified
algo:
ddpg:
params:
target_update_tau: 0.01
policy:
exploration:
sigma: 0.3
theta: 0.15
deterministic_params:
buffer_batch_size: 32
min_buffer_size: 10000
n_train_steps: 500
qf_lr: 0.0001
steps_per_epoch: 1
dqn:
params:
clip_gradient: 10
deterministic_eval: true
double_q: false
target_update_freq: 2
policy:
exploration:
decay_ratio: 0.5
max_epsilon: 1.0
min_epsilon: 0.05
general_params:
discount: 0.99
package: garage
policy:
hidden_sizes:
- 128
- 128
pretrained_policy: null
ppo:
params:
center_adv: false
tanhnormal: false
pretrain:
additional_config: null
algo_to_pretrain: null
params:
episodes_per_batch: 10
loss: log_prob
policy_lr: 0.01
pretrain_algo: rbc
replay_buffer:
buffer_size: 200000
rnd:
batch_size: 64
bound_reward_weight: cosine
bound_reward_weight_initial_ratio: 0.999999
bound_reward_weight_transient_epochs: 10
hidden_sizes:
- 64
- 64
intrinsic_reward_weight: 0.0001
n_train_steps: 32
output_dim: 128
predictor_lr: 0.001
standardize_extrinsic_reward: true
standardize_intrinsic_reward: true
sampler:
n_workers: 16
type: ray
train:
batch_size: 50000
n_epochs: 100
steps_per_epoch: 32
type: ppo
context:
disable_logging: false
experiment_name: null
log_dir:
from_keys:
- microgrid.config.scenario
- microgrid.methods.set_forecaster.forecaster
- microgrid.methods.set_module_attrs.battery_transition_model
- context.seed
- env.domain_randomization.noise_std
- algo.ppo.tanhnormal
- algo.rnd.intrinsic_reward_weight
parent: /home/ahalev/data/GridRL/paper_experiments
use_existing_dir: false
seed: 1
snapshot_gap: 10
verbose: 0
wandb:
api_key_file: ../../local/wandb_api_key.txt
group: null
log_density: 1
plot_baseline:
- mpc
- rbc
username: ahalev
env:
cls: DiscreteMicrogridEnv
domain_randomization:
noise_std: 0.01
relative_noise: true
forced_genset: null
net_load:
slack_module: grid
use: true
observation_keys:
- soc
- net_load
- import_price_current
- import_price_forecast_0
- import_price_forecast_1
- import_price_forecast_2
- import_price_forecast_3
- import_price_forecast_4
microgrid:
attributes:
reward_shaping_func: !BaselineShaper
baseline_module: false
module:
- grid
- 0
config:
scenario: 6
methods:
set_forecaster:
forecast_horizon: 23
forecaster: 0.0
forecaster_increase_uncertainty: true
forecaster_relative_noise: true
set_module_attrs:
battery_transition_model: null
normalized_action_bounds:
- 0.0
- 1.0
trajectory:
evaluate:
final_step: -1
initial_step: 5840
trajectory_func: null
train:
final_step: 5840
initial_step: 0
trajectory_func: !FixedLengthStochasticTrajectory
trajectory_length: 720
verbose: 1