File size: 2,655 Bytes
4d455b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
seed: 5227
env:
  env_id: PickSubtaskTrain-v0
  num_envs: 63
  max_episode_steps: 100
  make_env: true
  continuous_task: true
  cat_state: true
  cat_pixels: false
  frame_stack: 3
  stationary_base: false
  stationary_torso: false
  stationary_head: true
  task_plan_fp: task_plans/tidy_house/pick/train/005_tomato_soup_can.json
  spawn_data_fp: /arshukla-fast-vol-1/ManiSkill/data/scene_datasets/replica_cad_dataset/rearrange/spawn_data/tidy_house/pick/train/spawn_data.pt
  record_video: false
  debug_video: false
  debug_video_gen: false
  save_video_freq: null
  info_on_video: false
  extra_stat_keys: []
  env_kwargs:
    robot_force_mult: 0.001
    robot_force_penalty_min: 0.2
    target_randomization: false
eval_env:
  env_id: PickSubtaskTrain-v0
  num_envs: 189
  max_episode_steps: 200
  make_env: true
  continuous_task: true
  cat_state: true
  cat_pixels: false
  frame_stack: 3
  stationary_base: false
  stationary_torso: false
  stationary_head: true
  task_plan_fp: task_plans/tidy_house/pick/train/005_tomato_soup_can.json
  spawn_data_fp: /arshukla-fast-vol-1/ManiSkill/data/scene_datasets/replica_cad_dataset/rearrange/spawn_data/tidy_house/pick/train/spawn_data.pt
  record_video: false
  debug_video: false
  debug_video_gen: false
  save_video_freq: null
  info_on_video: true
  extra_stat_keys: []
  env_kwargs:
    robot_force_mult: 0.001
    robot_force_penalty_min: 0.2
    target_randomization: false
algo:
  name: sac
  replay_buffer_capacity: 995400
  total_timesteps: 50000000
  num_steps: 100
  init_steps: 5000
  batch_size: 512
  critic_encoder_tau: 0.005
  cnn_features:
  - 32
  - 64
  - 128
  - 256
  cnn_filters:
  - 3
  - 3
  - 3
  - 3
  cnn_strides:
  - 2
  - 2
  - 2
  - 2
  cnn_padding: valid
  encoder_pixels_feature_dim: 50
  encoder_state_feature_dim: 50
  detach_encoder: false
  critic_hidden_dims:
  - 256
  - 256
  - 256
  critic_lr: 0.0003
  critic_layer_norm: true
  critic_dropout: null
  critic_beta: 0.9
  critic_tau: 0.005
  critic_target_update_freq: 2
  actor_hidden_dims:
  - 256
  - 256
  - 256
  actor_lr: 0.0003
  actor_beta: 0.9
  actor_log_std_min: -20
  actor_log_std_max: 2
  actor_update_freq: 2
  gamma: 0.9
  init_temperature: 0.1
  alpha_lr: 0.0003
  alpha_beta: 0.9
  log_freq: 10000
  save_freq: 100000
  eval_freq: 100000
  torch_deterministic: true
  save_backup_ckpts: false
  eval_episodes: 189
  num_envs: 63
  num_eval_envs: 189
  num_iterations: 793651
logger:
  workspace: mshab_exps
  exp_name: rcad-tidy_house-pick-005_tomato_soup_can
  clear_out: true
  tensorboard: true
  wandb: false
model_ckpt: "mshab_checkpoints/rl/tidy_house/pick/005_tomato_soup_can/policy.pt"