|
--- |
|
tags: |
|
- reinforcement-learning |
|
- atari-alien |
|
- atari-amidar |
|
- atari-assault |
|
- atari-asterix |
|
- atari-asteroids |
|
- atari-atlantis |
|
- atari-bankheist |
|
- atari-battlezone |
|
- atari-beamrider |
|
- atari-berzerk |
|
- atari-bowling |
|
- atari-boxing |
|
- atari-breakout |
|
- atari-centipede |
|
- atari-choppercommand |
|
- atari-crazyclimber |
|
- atari-defender |
|
- atari-demonattack |
|
- atari-doubledunk |
|
- atari-enduro |
|
- atari-fishingderby |
|
- atari-freeway |
|
- atari-frostbite |
|
- atari-gopher |
|
- atari-gravitar |
|
- atari-hero |
|
- atari-icehockey |
|
- atari-jamesbond |
|
- atari-kangaroo |
|
- atari-krull |
|
- atari-kungfumaster |
|
- atari-montezumarevenge |
|
- atari-mspacman |
|
- atari-namethisgame |
|
- atari-phoenix |
|
- atari-pitfall |
|
- atari-pong |
|
- atari-privateeye |
|
- atari-qbert |
|
- atari-riverraid |
|
- atari-roadrunner |
|
- atari-robotank |
|
- atari-seaquest |
|
- atari-skiing |
|
- atari-solaris |
|
- atari-spaceinvaders |
|
- atari-stargunner |
|
- atari-surround |
|
- atari-tennis |
|
- atari-timepilot |
|
- atari-tutankham |
|
- atari-upndown |
|
- atari-venture |
|
- atari-videopinball |
|
- atari-wizardofwor |
|
- atari-yarsrevenge |
|
- atari-zaxxon |
|
- babyai-action-obj-door |
|
- babyai-blocked-unlock-pickup |
|
- babyai-boss-level-no-unlock |
|
- babyai-boss-level |
|
- babyai-find-obj-s5 |
|
- babyai-go-to-door |
|
- babyai-go-to-imp-unlock |
|
- babyai-go-to-local |
|
- babyai-go-to-obj-door |
|
- babyai-go-to-obj |
|
- babyai-go-to-red-ball-grey |
|
- babyai-go-to-red-ball-no-dists |
|
- babyai-go-to-red-ball |
|
- babyai-go-to-red-blue-ball |
|
- babyai-go-to-seq |
|
- babyai-go-to |
|
- babyai-key-corridor |
|
- babyai-mini-boss-level |
|
- babyai-move-two-across-s8n9 |
|
- babyai-one-room-s8 |
|
- babyai-open-door |
|
- babyai-open-doors-order-n4 |
|
- babyai-open-red-door |
|
- babyai-open-two-doors |
|
- babyai-open |
|
- babyai-pickup-above |
|
- babyai-pickup-dist |
|
- babyai-pickup-loc |
|
- babyai-pickup |
|
- babyai-put-next-local |
|
- babyai-put-next |
|
- babyai-synth-loc |
|
- babyai-synth-seq |
|
- babyai-synth |
|
- babyai-unblock-pickup |
|
- babyai-unlock-local |
|
- babyai-unlock-pickup |
|
- babyai-unlock-to-unlock |
|
- babyai-unlock |
|
- metaworld-assembly |
|
- metaworld-basketball |
|
- metaworld-bin-picking |
|
- metaworld-box-close |
|
- metaworld-button-press-topdown-wall |
|
- metaworld-button-press-topdown |
|
- metaworld-button-press-wall |
|
- metaworld-button-press |
|
- metaworld-coffee-button |
|
- metaworld-coffee-pull |
|
- metaworld-coffee-push |
|
- metaworld-dial-turn |
|
- metaworld-disassemble |
|
- metaworld-door-close |
|
- metaworld-door-lock |
|
- metaworld-door-open |
|
- metaworld-door-unlock |
|
- metaworld-drawer-close |
|
- metaworld-drawer-open |
|
- metaworld-faucet-close |
|
- metaworld-faucet-open |
|
- metaworld-hammer |
|
- metaworld-hand-insert |
|
- metaworld-handle-press-side |
|
- metaworld-handle-press |
|
- metaworld-handle-pull-side |
|
- metaworld-handle-pull |
|
- metaworld-lever-pull |
|
- metaworld-peg-insert-side |
|
- metaworld-peg-unplug-side |
|
- metaworld-pick-out-of-hole |
|
- metaworld-pick-place-wall |
|
- metaworld-pick-place |
|
- metaworld-plate-slide-back-side |
|
- metaworld-plate-slide-back |
|
- metaworld-plate-slide-side |
|
- metaworld-plate-slide |
|
- metaworld-push-back |
|
- metaworld-push-wall |
|
- metaworld-push |
|
- metaworld-reach-wall |
|
- metaworld-reach |
|
- metaworld-shelf-place |
|
- metaworld-soccer |
|
- metaworld-stick-pull |
|
- metaworld-stick-push |
|
- metaworld-sweep-into |
|
- metaworld-sweep |
|
- metaworld-window-close |
|
- metaworld-window-open |
|
- mujoco-ant |
|
- mujoco-doublependulum |
|
- mujoco-halfcheetah |
|
- mujoco-hopper |
|
- mujoco-humanoid |
|
- mujoco-pendulum |
|
- mujoco-pusher |
|
- mujoco-reacher |
|
- mujoco-standup |
|
- mujoco-swimmer |
|
- mujoco-walker |
|
datasets: jat-project/jat-dataset |
|
pipeline_tag: reinforcement-learning |
|
model-index: |
|
- name: jat-project/jat |
|
results: |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Atari 57 |
|
type: atari |
|
metrics: |
|
- type: iqm_expert_normalized_total_reward |
|
value: 0.06 [0.06, 0.06] |
|
name: IQM expert normalized total reward |
|
- type: iqm_human_normalized_total_reward |
|
value: 0.17 [0.16, 0.17] |
|
name: IQM human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: BabyAI |
|
type: babyai |
|
metrics: |
|
- type: iqm_expert_normalized_total_reward |
|
value: 0.99 [0.99, 0.99] |
|
name: IQM expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: MetaWorld |
|
type: metaworld |
|
metrics: |
|
- type: iqm_expert_normalized_total_reward |
|
value: 0.68 [0.67, 0.69] |
|
name: IQM expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: MuJoCo |
|
type: mujoco |
|
metrics: |
|
- type: iqm_expert_normalized_total_reward |
|
value: 0.81 [0.80, 0.82] |
|
name: IQM expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Alien |
|
type: atari-alien |
|
metrics: |
|
- type: total_reward |
|
value: 1085.90 +/- 396.36 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.05 +/- 0.02 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.12 +/- 0.06 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Amidar |
|
type: atari-amidar |
|
metrics: |
|
- type: total_reward |
|
value: 41.26 +/- 28.57 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.02 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.02 +/- 0.02 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Assault |
|
type: atari-assault |
|
metrics: |
|
- type: total_reward |
|
value: 772.89 +/- 59.34 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.04 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 1.06 +/- 0.11 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Asterix |
|
type: atari-asterix |
|
metrics: |
|
- type: total_reward |
|
value: 778.50 +/- 428.97 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.16 +/- 0.12 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.07 +/- 0.05 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Asteroids |
|
type: atari-asteroids |
|
metrics: |
|
- type: total_reward |
|
value: 1423.60 +/- 538.79 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.02 +/- 0.01 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Atlantis |
|
type: atari-atlantis |
|
metrics: |
|
- type: total_reward |
|
value: 23541.00 +/- 10376.72 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.03 +/- 0.03 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.66 +/- 0.64 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Bank Heist |
|
type: atari-bankheist |
|
metrics: |
|
- type: total_reward |
|
value: 685.50 +/- 157.92 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.51 +/- 0.12 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.91 +/- 0.21 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Battle Zone |
|
type: atari-battlezone |
|
metrics: |
|
- type: total_reward |
|
value: 12950.00 +/- 4306.68 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.04 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.34 +/- 0.12 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Beam Rider |
|
type: atari-beamrider |
|
metrics: |
|
- type: total_reward |
|
value: 762.04 +/- 243.25 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.02 +/- 0.01 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Berzerk |
|
type: atari-berzerk |
|
metrics: |
|
- type: total_reward |
|
value: 523.90 +/- 161.95 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.16 +/- 0.06 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Bowling |
|
type: atari-bowling |
|
metrics: |
|
- type: total_reward |
|
value: 29.99 +/- 11.49 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.05 +/- 0.08 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Boxing |
|
type: atari-boxing |
|
metrics: |
|
- type: total_reward |
|
value: 87.00 +/- 22.57 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.89 +/- 0.23 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 7.24 +/- 1.88 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Breakout |
|
type: atari-breakout |
|
metrics: |
|
- type: total_reward |
|
value: 9.16 +/- 5.76 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.26 +/- 0.20 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Centipede |
|
type: atari-centipede |
|
metrics: |
|
- type: total_reward |
|
value: 4461.72 +/- 2188.80 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.25 +/- 0.23 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.24 +/- 0.22 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Chopper Command |
|
type: atari-choppercommand |
|
metrics: |
|
- type: total_reward |
|
value: 1497.00 +/- 723.11 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.10 +/- 0.11 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Crazy Climber |
|
type: atari-crazyclimber |
|
metrics: |
|
- type: total_reward |
|
value: 52850.00 +/- 31617.86 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.25 +/- 0.19 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 1.68 +/- 1.26 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Defender |
|
type: atari-defender |
|
metrics: |
|
- type: total_reward |
|
value: 10627.50 +/- 4473.21 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.02 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.49 +/- 0.28 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Demon Attack |
|
type: atari-demonattack |
|
metrics: |
|
- type: total_reward |
|
value: 315.10 +/- 279.01 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.09 +/- 0.15 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Double Dunk |
|
type: atari-doubledunk |
|
metrics: |
|
- type: total_reward |
|
value: 0.08 +/- 11.61 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.47 +/- 0.29 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.53 +/- 0.33 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Enduro |
|
type: atari-enduro |
|
metrics: |
|
- type: total_reward |
|
value: 111.49 +/- 27.36 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.05 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.13 +/- 0.03 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Fishing Derby |
|
type: atari-fishingderby |
|
metrics: |
|
- type: total_reward |
|
value: -55.21 +/- 19.35 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.37 +/- 0.20 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.28 +/- 0.15 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Freeway |
|
type: atari-freeway |
|
metrics: |
|
- type: total_reward |
|
value: 24.12 +/- 1.64 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.71 +/- 0.05 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.81 +/- 0.06 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Frostbite |
|
type: atari-frostbite |
|
metrics: |
|
- type: total_reward |
|
value: 617.30 +/- 686.11 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.04 +/- 0.05 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.13 +/- 0.16 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Gopher |
|
type: atari-gopher |
|
metrics: |
|
- type: total_reward |
|
value: 2947.20 +/- 1448.32 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.03 +/- 0.02 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 1.25 +/- 0.67 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Gravitar |
|
type: atari-gravitar |
|
metrics: |
|
- type: total_reward |
|
value: 1030.50 +/- 719.20 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.22 +/- 0.19 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.27 +/- 0.23 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: H.E.R.O. |
|
type: atari-hero |
|
metrics: |
|
- type: total_reward |
|
value: 6997.95 +/- 2562.51 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.14 +/- 0.06 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.20 +/- 0.09 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Ice Hockey |
|
type: atari-icehockey |
|
metrics: |
|
- type: total_reward |
|
value: -3.77 +/- 3.10 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.20 +/- 0.09 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.61 +/- 0.26 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: James Bond |
|
type: atari-jamesbond |
|
metrics: |
|
- type: total_reward |
|
value: 187.50 +/- 72.24 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.58 +/- 0.26 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Kangaroo |
|
type: atari-kangaroo |
|
metrics: |
|
- type: total_reward |
|
value: 124.00 +/- 156.92 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.14 +/- 0.30 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.02 +/- 0.05 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Krull |
|
type: atari-krull |
|
metrics: |
|
- type: total_reward |
|
value: 8933.00 +/- 1358.65 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.75 +/- 0.14 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 6.87 +/- 1.27 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Kung-Fu Master |
|
type: atari-kungfumaster |
|
metrics: |
|
- type: total_reward |
|
value: 100.00 +/- 142.13 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: -0.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: -0.01 +/- 0.01 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Montezuma's Revenge |
|
type: atari-montezumarevenge |
|
metrics: |
|
- type: total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Ms. Pacman |
|
type: atari-mspacman |
|
metrics: |
|
- type: total_reward |
|
value: 1516.30 +/- 376.72 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.18 +/- 0.06 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.18 +/- 0.06 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Name This Game |
|
type: atari-namethisgame |
|
metrics: |
|
- type: total_reward |
|
value: 3798.60 +/- 1361.64 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.07 +/- 0.07 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.26 +/- 0.24 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Phoenix |
|
type: atari-phoenix |
|
metrics: |
|
- type: total_reward |
|
value: 1267.50 +/- 1013.72 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.08 +/- 0.16 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: PitFall |
|
type: atari-pitfall |
|
metrics: |
|
- type: total_reward |
|
value: -287.36 +/- 492.82 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: -0.25 +/- 2.16 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: -0.01 +/- 0.07 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pong |
|
type: atari-pong |
|
metrics: |
|
- type: total_reward |
|
value: -11.03 +/- 11.29 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.23 +/- 0.27 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.27 +/- 0.32 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Private Eye |
|
type: atari-privateeye |
|
metrics: |
|
- type: total_reward |
|
value: 96.00 +/- 19.60 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.95 +/- 0.26 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Q*Bert |
|
type: atari-qbert |
|
metrics: |
|
- type: total_reward |
|
value: 1701.75 +/- 1912.56 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.04 +/- 0.04 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.12 +/- 0.14 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: River Raid |
|
type: atari-riverraid |
|
metrics: |
|
- type: total_reward |
|
value: 2793.10 +/- 693.84 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.11 +/- 0.05 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.09 +/- 0.04 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Road Runner |
|
type: atari-roadrunner |
|
metrics: |
|
- type: total_reward |
|
value: 7699.00 +/- 3446.61 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.10 +/- 0.04 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.98 +/- 0.44 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Robotank |
|
type: atari-robotank |
|
metrics: |
|
- type: total_reward |
|
value: 16.36 +/- 5.24 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.18 +/- 0.07 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 1.46 +/- 0.54 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Seaquest |
|
type: atari-seaquest |
|
metrics: |
|
- type: total_reward |
|
value: 515.20 +/- 141.51 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.18 +/- 0.06 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.01 +/- 0.00 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Skiing |
|
type: atari-skiing |
|
metrics: |
|
- type: total_reward |
|
value: -29396.08 +/- 3289.80 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: -1.93 +/- 0.52 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: -0.96 +/- 0.26 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Solaris |
|
type: atari-solaris |
|
metrics: |
|
- type: total_reward |
|
value: 988.20 +/- 487.42 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: -2.11 +/- 4.15 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: -0.02 +/- 0.04 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Space Invaders |
|
type: atari-spaceinvaders |
|
metrics: |
|
- type: total_reward |
|
value: 339.50 +/- 164.05 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.13 +/- 0.11 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Star Gunner |
|
type: atari-stargunner |
|
metrics: |
|
- type: total_reward |
|
value: 978.00 +/- 638.37 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.03 +/- 0.07 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Surround |
|
type: atari-surround |
|
metrics: |
|
- type: total_reward |
|
value: -8.22 +/- 1.19 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.09 +/- 0.06 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.11 +/- 0.07 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Tennis |
|
type: atari-tennis |
|
metrics: |
|
- type: total_reward |
|
value: -22.38 +/- 2.22 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.04 +/- 0.06 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.04 +/- 0.07 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Time Pilot |
|
type: atari-timepilot |
|
metrics: |
|
- type: total_reward |
|
value: 9534.00 +/- 2577.76 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.09 +/- 0.04 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 3.59 +/- 1.55 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Tutankham |
|
type: atari-tutankham |
|
metrics: |
|
- type: total_reward |
|
value: 40.20 +/- 14.51 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.10 +/- 0.05 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.18 +/- 0.09 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Up and Down |
|
type: atari-upndown |
|
metrics: |
|
- type: total_reward |
|
value: 6072.00 +/- 2283.30 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.01 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.50 +/- 0.20 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Venture |
|
type: atari-venture |
|
metrics: |
|
- type: total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Video Pinball |
|
type: atari-videopinball |
|
metrics: |
|
- type: total_reward |
|
value: 7943.01 +/- 8351.21 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.02 +/- 0.02 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.45 +/- 0.47 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Wizard of Wor |
|
type: atari-wizardofwor |
|
metrics: |
|
- type: total_reward |
|
value: 1306.00 +/- 1139.81 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.02 +/- 0.02 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.18 +/- 0.27 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Yars Revenge |
|
type: atari-yarsrevenge |
|
metrics: |
|
- type: total_reward |
|
value: 8597.41 +/- 4291.81 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.02 +/- 0.02 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.11 +/- 0.08 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Zaxxon |
|
type: atari-zaxxon |
|
metrics: |
|
- type: total_reward |
|
value: 896.00 +/- 1172.68 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.02 |
|
name: Expert normalized total reward |
|
- type: human_normalized_total_reward |
|
value: 0.09 +/- 0.13 |
|
name: Human normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Action Obj Door |
|
type: babyai-action-obj-door |
|
metrics: |
|
- type: total_reward |
|
value: 0.95 +/- 0.13 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.94 +/- 0.22 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Blocked Unlock Pickup |
|
type: babyai-blocked-unlock-pickup |
|
metrics: |
|
- type: total_reward |
|
value: 0.95 +/- 0.01 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.01 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Boss Level No Unlock |
|
type: babyai-boss-level-no-unlock |
|
metrics: |
|
- type: total_reward |
|
value: 0.44 +/- 0.45 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.43 +/- 0.51 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Boss Level |
|
type: babyai-boss-level |
|
metrics: |
|
- type: total_reward |
|
value: 0.48 +/- 0.45 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.48 +/- 0.51 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Find Obj S5 |
|
type: babyai-find-obj-s5 |
|
metrics: |
|
- type: total_reward |
|
value: 0.95 +/- 0.03 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.04 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Door |
|
type: babyai-go-to-door |
|
metrics: |
|
- type: total_reward |
|
value: 0.99 +/- 0.01 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.01 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Imp Unlock |
|
type: babyai-go-to-imp-unlock |
|
metrics: |
|
- type: total_reward |
|
value: 0.50 +/- 0.44 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.56 +/- 0.59 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Local |
|
type: babyai-go-to-local |
|
metrics: |
|
- type: total_reward |
|
value: 0.88 +/- 0.14 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.94 +/- 0.18 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Obj Door |
|
type: babyai-go-to-obj-door |
|
metrics: |
|
- type: total_reward |
|
value: 0.98 +/- 0.04 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.97 +/- 0.08 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Obj |
|
type: babyai-go-to-obj |
|
metrics: |
|
- type: total_reward |
|
value: 0.93 +/- 0.04 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.99 +/- 0.05 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Red Ball Grey |
|
type: babyai-go-to-red-ball-grey |
|
metrics: |
|
- type: total_reward |
|
value: 0.91 +/- 0.06 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.99 +/- 0.08 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Red Ball No Dists |
|
type: babyai-go-to-red-ball-no-dists |
|
metrics: |
|
- type: total_reward |
|
value: 0.93 +/- 0.03 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.04 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Red Ball |
|
type: babyai-go-to-red-ball |
|
metrics: |
|
- type: total_reward |
|
value: 0.91 +/- 0.08 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.98 +/- 0.11 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Red Blue Ball |
|
type: babyai-go-to-red-blue-ball |
|
metrics: |
|
- type: total_reward |
|
value: 0.88 +/- 0.11 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.96 +/- 0.13 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To Seq |
|
type: babyai-go-to-seq |
|
metrics: |
|
- type: total_reward |
|
value: 0.73 +/- 0.34 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.75 +/- 0.40 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Go To |
|
type: babyai-go-to |
|
metrics: |
|
- type: total_reward |
|
value: 0.80 +/- 0.27 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.85 +/- 0.35 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Key Corridor |
|
type: babyai-key-corridor |
|
metrics: |
|
- type: total_reward |
|
value: 0.88 +/- 0.10 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.97 +/- 0.11 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Mini Boss Level |
|
type: babyai-mini-boss-level |
|
metrics: |
|
- type: total_reward |
|
value: 0.69 +/- 0.35 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.76 +/- 0.43 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Move Two Across S8N9 |
|
type: babyai-move-two-across-s8n9 |
|
metrics: |
|
- type: total_reward |
|
value: 0.03 +/- 0.15 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.03 +/- 0.16 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: One Room S8 |
|
type: babyai-one-room-s8 |
|
metrics: |
|
- type: total_reward |
|
value: 0.92 +/- 0.03 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.04 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Open Door |
|
type: babyai-open-door |
|
metrics: |
|
- type: total_reward |
|
value: 0.99 +/- 0.00 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.01 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Open Doors Order N4 |
|
type: babyai-open-doors-order-n4 |
|
metrics: |
|
- type: total_reward |
|
value: 0.96 +/- 0.11 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.97 +/- 0.13 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Open Red Door |
|
type: babyai-open-red-door |
|
metrics: |
|
- type: total_reward |
|
value: 0.92 +/- 0.02 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.03 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Open Two Doors |
|
type: babyai-open-two-doors |
|
metrics: |
|
- type: total_reward |
|
value: 0.98 +/- 0.00 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Open |
|
type: babyai-open |
|
metrics: |
|
- type: total_reward |
|
value: 0.93 +/- 0.11 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.97 +/- 0.13 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pickup Above |
|
type: babyai-pickup-above |
|
metrics: |
|
- type: total_reward |
|
value: 0.92 +/- 0.06 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.01 +/- 0.07 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pickup Dist |
|
type: babyai-pickup-dist |
|
metrics: |
|
- type: total_reward |
|
value: 0.88 +/- 0.13 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.03 +/- 0.18 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pickup Loc |
|
type: babyai-pickup-loc |
|
metrics: |
|
- type: total_reward |
|
value: 0.84 +/- 0.20 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.91 +/- 0.24 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pickup |
|
type: babyai-pickup |
|
metrics: |
|
- type: total_reward |
|
value: 0.72 +/- 0.34 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.77 +/- 0.40 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Put Next Local |
|
type: babyai-put-next-local |
|
metrics: |
|
- type: total_reward |
|
value: 0.60 +/- 0.36 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.65 +/- 0.39 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Put Next S7N4 |
|
type: babyai-put-next |
|
metrics: |
|
- type: total_reward |
|
value: 0.82 +/- 0.26 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.86 +/- 0.27 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Synth Loc |
|
type: babyai-synth-loc |
|
metrics: |
|
- type: total_reward |
|
value: 0.82 +/- 0.31 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.85 +/- 0.38 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Synth Seq |
|
type: babyai-synth-seq |
|
metrics: |
|
- type: total_reward |
|
value: 0.57 +/- 0.44 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.57 +/- 0.50 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Synth |
|
type: babyai-synth |
|
metrics: |
|
- type: total_reward |
|
value: 0.68 +/- 0.39 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.69 +/- 0.47 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Unblock Pickup |
|
type: babyai-unblock-pickup |
|
metrics: |
|
- type: total_reward |
|
value: 0.76 +/- 0.33 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.82 +/- 0.39 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Unlock Local |
|
type: babyai-unlock-local |
|
metrics: |
|
- type: total_reward |
|
value: 0.98 +/- 0.01 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.01 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Unlock Pickup |
|
type: babyai-unlock-pickup |
|
metrics: |
|
- type: total_reward |
|
value: 0.76 +/- 0.03 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.01 +/- 0.04 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Unlock To Unlock |
|
type: babyai-unlock-to-unlock |
|
metrics: |
|
- type: total_reward |
|
value: 0.86 +/- 0.29 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.89 +/- 0.30 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Unlock |
|
type: babyai-unlock |
|
metrics: |
|
- type: total_reward |
|
value: 0.55 +/- 0.42 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.63 +/- 0.50 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Assembly |
|
type: metaworld-assembly |
|
metrics: |
|
- type: total_reward |
|
value: 238.32 +/- 32.98 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.96 +/- 0.16 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Basketball |
|
type: metaworld-basketball |
|
metrics: |
|
- type: total_reward |
|
value: 1.59 +/- 0.43 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: -0.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: BinPicking |
|
type: metaworld-bin-picking |
|
metrics: |
|
- type: total_reward |
|
value: 374.18 +/- 168.23 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.88 +/- 0.40 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Box Close |
|
type: metaworld-box-close |
|
metrics: |
|
- type: total_reward |
|
value: 510.10 +/- 117.47 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.99 +/- 0.27 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Button Press Topdown Wall |
|
type: metaworld-button-press-topdown-wall |
|
metrics: |
|
- type: total_reward |
|
value: 260.07 +/- 67.75 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.49 +/- 0.14 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Button Press Topdown |
|
type: metaworld-button-press-topdown |
|
metrics: |
|
- type: total_reward |
|
value: 265.16 +/- 77.93 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.51 +/- 0.17 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Button Press Wall |
|
type: metaworld-button-press-wall |
|
metrics: |
|
- type: total_reward |
|
value: 621.75 +/- 137.13 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.92 +/- 0.21 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Button Press |
|
type: metaworld-button-press |
|
metrics: |
|
- type: total_reward |
|
value: 556.75 +/- 198.85 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.86 +/- 0.33 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Coffee Button |
|
type: metaworld-coffee-button |
|
metrics: |
|
- type: total_reward |
|
value: 250.50 +/- 266.92 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.31 +/- 0.38 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Coffee Pull |
|
type: metaworld-coffee-pull |
|
metrics: |
|
- type: total_reward |
|
value: 55.13 +/- 96.96 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.20 +/- 0.38 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Coffee Push |
|
type: metaworld-coffee-push |
|
metrics: |
|
- type: total_reward |
|
value: 269.17 +/- 237.82 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.54 +/- 0.48 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Dial Turn |
|
type: metaworld-dial-turn |
|
metrics: |
|
- type: total_reward |
|
value: 738.22 +/- 168.43 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.93 +/- 0.22 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Disassemble |
|
type: metaworld-disassemble |
|
metrics: |
|
- type: total_reward |
|
value: 39.14 +/- 11.85 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: -0.47 +/- 4.70 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Door Close |
|
type: metaworld-door-close |
|
metrics: |
|
- type: total_reward |
|
value: 528.17 +/- 29.90 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.06 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Door Lock |
|
type: metaworld-door-lock |
|
metrics: |
|
- type: total_reward |
|
value: 676.51 +/- 192.68 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.81 +/- 0.28 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Door Open |
|
type: metaworld-door-open |
|
metrics: |
|
- type: total_reward |
|
value: 572.76 +/- 57.53 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.98 +/- 0.11 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Door Unlock |
|
type: metaworld-door-unlock |
|
metrics: |
|
- type: total_reward |
|
value: 654.94 +/- 260.64 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.79 +/- 0.37 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Drawer Close |
|
type: metaworld-drawer-close |
|
metrics: |
|
- type: total_reward |
|
value: 663.02 +/- 214.51 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.73 +/- 0.29 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Drawer Open |
|
type: metaworld-drawer-open |
|
metrics: |
|
- type: total_reward |
|
value: 489.07 +/- 21.28 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.99 +/- 0.06 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Faucet Close |
|
type: metaworld-faucet-close |
|
metrics: |
|
- type: total_reward |
|
value: 361.32 +/- 72.28 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.22 +/- 0.14 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Faucet Open |
|
type: metaworld-faucet-open |
|
metrics: |
|
- type: total_reward |
|
value: 637.86 +/- 134.50 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.85 +/- 0.29 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Hammer |
|
type: metaworld-hammer |
|
metrics: |
|
- type: total_reward |
|
value: 691.72 +/- 25.25 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.04 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Hand Insert |
|
type: metaworld-hand-insert |
|
metrics: |
|
- type: total_reward |
|
value: 719.57 +/- 99.26 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.97 +/- 0.13 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Handle Press Side |
|
type: metaworld-handle-press-side |
|
metrics: |
|
- type: total_reward |
|
value: 84.25 +/- 113.34 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.03 +/- 0.14 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Handle Press |
|
type: metaworld-handle-press |
|
metrics: |
|
- type: total_reward |
|
value: 731.94 +/- 261.90 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.84 +/- 0.34 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Handle Pull Side |
|
type: metaworld-handle-pull-side |
|
metrics: |
|
- type: total_reward |
|
value: 233.11 +/- 199.49 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.60 +/- 0.52 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Handle Pull |
|
type: metaworld-handle-pull |
|
metrics: |
|
- type: total_reward |
|
value: 501.29 +/- 209.45 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.74 +/- 0.32 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Lever Pull |
|
type: metaworld-lever-pull |
|
metrics: |
|
- type: total_reward |
|
value: 250.18 +/- 228.59 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.34 +/- 0.41 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Peg Insert Side |
|
type: metaworld-peg-insert-side |
|
metrics: |
|
- type: total_reward |
|
value: 288.02 +/- 157.87 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.91 +/- 0.50 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Peg Unplug Side |
|
type: metaworld-peg-unplug-side |
|
metrics: |
|
- type: total_reward |
|
value: 68.48 +/- 125.34 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.14 +/- 0.28 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pick Out Of Hole |
|
type: metaworld-pick-out-of-hole |
|
metrics: |
|
- type: total_reward |
|
value: 2.08 +/- 0.05 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.00 +/- 0.00 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pick Place Wall |
|
type: metaworld-pick-place-wall |
|
metrics: |
|
- type: total_reward |
|
value: 6.87 +/- 44.99 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.02 +/- 0.10 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pick Place |
|
type: metaworld-pick-place |
|
metrics: |
|
- type: total_reward |
|
value: 264.18 +/- 195.69 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.63 +/- 0.47 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Plate Slide Back Side |
|
type: metaworld-plate-slide-back-side |
|
metrics: |
|
- type: total_reward |
|
value: 697.54 +/- 137.79 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.95 +/- 0.20 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Plate Slide Back |
|
type: metaworld-plate-slide-back |
|
metrics: |
|
- type: total_reward |
|
value: 196.80 +/- 1.73 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.24 +/- 0.00 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Plate Slide Side |
|
type: metaworld-plate-slide-side |
|
metrics: |
|
- type: total_reward |
|
value: 122.61 +/- 24.52 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.16 +/- 0.04 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Plate Slide |
|
type: metaworld-plate-slide |
|
metrics: |
|
- type: total_reward |
|
value: 497.42 +/- 168.74 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.93 +/- 0.37 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Push Back |
|
type: metaworld-push-back |
|
metrics: |
|
- type: total_reward |
|
value: 91.41 +/- 115.05 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.08 +/- 1.37 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Push Wall |
|
type: metaworld-push-wall |
|
metrics: |
|
- type: total_reward |
|
value: 116.49 +/- 208.05 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.15 +/- 0.28 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Push |
|
type: metaworld-push |
|
metrics: |
|
- type: total_reward |
|
value: 604.25 +/- 261.90 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.80 +/- 0.35 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Reach Wall |
|
type: metaworld-reach-wall |
|
metrics: |
|
- type: total_reward |
|
value: 634.57 +/- 231.40 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.81 +/- 0.38 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Reach |
|
type: metaworld-reach |
|
metrics: |
|
- type: total_reward |
|
value: 325.27 +/- 159.21 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.33 +/- 0.30 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Shelf Place |
|
type: metaworld-shelf-place |
|
metrics: |
|
- type: total_reward |
|
value: 124.60 +/- 112.83 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.52 +/- 0.47 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Soccer |
|
type: metaworld-soccer |
|
metrics: |
|
- type: total_reward |
|
value: 364.50 +/- 175.45 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.97 +/- 0.47 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Stick Pull |
|
type: metaworld-stick-pull |
|
metrics: |
|
- type: total_reward |
|
value: 398.64 +/- 205.60 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.76 +/- 0.39 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Stick Push |
|
type: metaworld-stick-push |
|
metrics: |
|
- type: total_reward |
|
value: 158.29 +/- 264.59 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.25 +/- 0.42 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Sweep Into |
|
type: metaworld-sweep-into |
|
metrics: |
|
- type: total_reward |
|
value: 775.30 +/- 119.00 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.97 +/- 0.15 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Sweep |
|
type: metaworld-sweep |
|
metrics: |
|
- type: total_reward |
|
value: 15.64 +/- 9.29 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.01 +/- 0.02 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Window Close |
|
type: metaworld-window-close |
|
metrics: |
|
- type: total_reward |
|
value: 423.33 +/- 203.92 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.69 +/- 0.38 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Window Open |
|
type: metaworld-window-open |
|
metrics: |
|
- type: total_reward |
|
value: 593.10 +/- 54.83 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.00 +/- 0.10 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Ant |
|
type: mujoco-ant |
|
metrics: |
|
- type: total_reward |
|
value: 5268.02 +/- 1495.39 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.90 +/- 0.25 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Inverted Double Pendulum |
|
type: mujoco-doublependulum |
|
metrics: |
|
- type: total_reward |
|
value: 4750.14 +/- 931.20 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.51 +/- 0.10 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Half Cheetah |
|
type: mujoco-halfcheetah |
|
metrics: |
|
- type: total_reward |
|
value: 6659.69 +/- 409.71 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.90 +/- 0.05 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Hopper |
|
type: mujoco-hopper |
|
metrics: |
|
- type: total_reward |
|
value: 1835.93 +/- 532.21 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.99 +/- 0.29 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Humanoid |
|
type: mujoco-humanoid |
|
metrics: |
|
- type: total_reward |
|
value: 697.44 +/- 108.06 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.09 +/- 0.02 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Inverted Pendulum |
|
type: mujoco-pendulum |
|
metrics: |
|
- type: total_reward |
|
value: 116.34 +/- 20.19 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.23 +/- 0.04 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Pusher |
|
type: mujoco-pusher |
|
metrics: |
|
- type: total_reward |
|
value: -26.33 +/- 6.32 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.99 +/- 0.05 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Reacher |
|
type: mujoco-reacher |
|
metrics: |
|
- type: total_reward |
|
value: -6.06 +/- 2.64 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.99 +/- 0.07 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Humanoid Standup |
|
type: mujoco-standup |
|
metrics: |
|
- type: total_reward |
|
value: 118125.15 +/- 24880.28 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 0.35 +/- 0.10 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Swimmer |
|
type: mujoco-swimmer |
|
metrics: |
|
- type: total_reward |
|
value: 93.26 +/- 3.78 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.01 +/- 0.04 |
|
name: Expert normalized total reward |
|
- task: |
|
type: reinforcement-learning |
|
name: Reinforcement Learning |
|
dataset: |
|
name: Walker 2d |
|
type: mujoco-walker |
|
metrics: |
|
- type: total_reward |
|
value: 4662.43 +/- 762.67 |
|
name: Total reward |
|
- type: expert_normalized_total_reward |
|
value: 1.01 +/- 0.16 |
|
name: Expert normalized total reward |
|
--- |
|
|
|
# Model Card for Jat |
|
|
|
This is a multi-modal and multi-task model. |
|
|
|
## Model Details |
|
|
|
### Model Description |
|
|
|
- **Developed by:** The JAT Team |
|
- **License:** Apache 2.0 |
|
|
|
### Model Sources |
|
|
|
- **Repository:** <https://github.com/huggingface/jat> |
|
- **Paper:** Coming soon |
|
- **Demo:** Coming soon |
|
|
|
## Training |
|
|
|
The model was trained on the following tasks: |
|
|
|
- Alien |
|
- Amidar |
|
- Assault |
|
- Asterix |
|
- Asteroids |
|
- Atlantis |
|
- Bank Heist |
|
- Battle Zone |
|
- Beam Rider |
|
- Berzerk |
|
- Bowling |
|
- Boxing |
|
- Breakout |
|
- Centipede |
|
- Chopper Command |
|
- Crazy Climber |
|
- Defender |
|
- Demon Attack |
|
- Double Dunk |
|
- Enduro |
|
- Fishing Derby |
|
- Freeway |
|
- Frostbite |
|
- Gopher |
|
- Gravitar |
|
- H.E.R.O. |
|
- Ice Hockey |
|
- James Bond |
|
- Kangaroo |
|
- Krull |
|
- Kung-Fu Master |
|
- Montezuma's Revenge |
|
- Ms. Pacman |
|
- Name This Game |
|
- Phoenix |
|
- PitFall |
|
- Pong |
|
- Private Eye |
|
- Q*Bert |
|
- River Raid |
|
- Road Runner |
|
- Robotank |
|
- Seaquest |
|
- Skiing |
|
- Solaris |
|
- Space Invaders |
|
- Star Gunner |
|
- Surround |
|
- Tennis |
|
- Time Pilot |
|
- Tutankham |
|
- Up and Down |
|
- Venture |
|
- Video Pinball |
|
- Wizard of Wor |
|
- Yars Revenge |
|
- Zaxxon |
|
- Action Obj Door |
|
- Blocked Unlock Pickup |
|
- Boss Level No Unlock |
|
- Boss Level |
|
- Find Obj S5 |
|
- Go To Door |
|
- Go To Imp Unlock |
|
- Go To Local |
|
- Go To Obj Door |
|
- Go To Obj |
|
- Go To Red Ball Grey |
|
- Go To Red Ball No Dists |
|
- Go To Red Ball |
|
- Go To Red Blue Ball |
|
- Go To Seq |
|
- Go To |
|
- Key Corridor |
|
- Mini Boss Level |
|
- Move Two Across S8N9 |
|
- One Room S8 |
|
- Open Door |
|
- Open Doors Order N4 |
|
- Open Red Door |
|
- Open Two Doors |
|
- Open |
|
- Pickup Above |
|
- Pickup Dist |
|
- Pickup Loc |
|
- Pickup |
|
- Put Next Local |
|
- Put Next S7N4 |
|
- Synth Loc |
|
- Synth Seq |
|
- Synth |
|
- Unblock Pickup |
|
- Unlock Local |
|
- Unlock Pickup |
|
- Unlock To Unlock |
|
- Unlock |
|
- Assembly |
|
- Basketball |
|
- BinPicking |
|
- Box Close |
|
- Button Press Topdown Wall |
|
- Button Press Topdown |
|
- Button Press Wall |
|
- Button Press |
|
- Coffee Button |
|
- Coffee Pull |
|
- Coffee Push |
|
- Dial Turn |
|
- Disassemble |
|
- Door Close |
|
- Door Lock |
|
- Door Open |
|
- Door Unlock |
|
- Drawer Close |
|
- Drawer Open |
|
- Faucet Close |
|
- Faucet Open |
|
- Hammer |
|
- Hand Insert |
|
- Handle Press Side |
|
- Handle Press |
|
- Handle Pull Side |
|
- Handle Pull |
|
- Lever Pull |
|
- Peg Insert Side |
|
- Peg Unplug Side |
|
- Pick Out Of Hole |
|
- Pick Place Wall |
|
- Pick Place |
|
- Plate Slide Back Side |
|
- Plate Slide Back |
|
- Plate Slide Side |
|
- Plate Slide |
|
- Push Back |
|
- Push Wall |
|
- Push |
|
- Reach Wall |
|
- Reach |
|
- Shelf Place |
|
- Soccer |
|
- Stick Pull |
|
- Stick Push |
|
- Sweep Into |
|
- Sweep |
|
- Window Close |
|
- Window Open |
|
- Ant |
|
- Inverted Double Pendulum |
|
- Half Cheetah |
|
- Hopper |
|
- Humanoid |
|
- Inverted Pendulum |
|
- Pusher |
|
- Reacher |
|
- Humanoid Standup |
|
- Swimmer |
|
- Walker 2d |
|
|
|
## How to Get Started with the Model |
|
|
|
Use the code below to get started with the model. |
|
|
|
```python |
|
from transformers import AutoModelForCausalLM |
|
|
|
model = AutoModelForCausalLM.from_pretrained("jat-project/jat") |
|
``` |
|
|