jat / README.md
qgallouedec's picture
qgallouedec HF staff
Upload model card
a788d9c verified
|
raw
history blame
69.3 kB
metadata
tags:
  - reinforcement-learning
  - atari-alien
  - atari-amidar
  - atari-assault
  - atari-asterix
  - atari-asteroids
  - atari-atlantis
  - atari-bankheist
  - atari-battlezone
  - atari-beamrider
  - atari-berzerk
  - atari-bowling
  - atari-boxing
  - atari-breakout
  - atari-centipede
  - atari-choppercommand
  - atari-crazyclimber
  - atari-defender
  - atari-demonattack
  - atari-doubledunk
  - atari-enduro
  - atari-fishingderby
  - atari-freeway
  - atari-frostbite
  - atari-gopher
  - atari-gravitar
  - atari-hero
  - atari-icehockey
  - atari-jamesbond
  - atari-kangaroo
  - atari-krull
  - atari-kungfumaster
  - atari-montezumarevenge
  - atari-mspacman
  - atari-namethisgame
  - atari-phoenix
  - atari-pitfall
  - atari-pong
  - atari-privateeye
  - atari-qbert
  - atari-riverraid
  - atari-roadrunner
  - atari-robotank
  - atari-seaquest
  - atari-skiing
  - atari-solaris
  - atari-spaceinvaders
  - atari-stargunner
  - atari-surround
  - atari-tennis
  - atari-timepilot
  - atari-tutankham
  - atari-upndown
  - atari-venture
  - atari-videopinball
  - atari-wizardofwor
  - atari-yarsrevenge
  - atari-zaxxon
  - babyai-action-obj-door
  - babyai-blocked-unlock-pickup
  - babyai-boss-level-no-unlock
  - babyai-boss-level
  - babyai-find-obj-s5
  - babyai-go-to-door
  - babyai-go-to-imp-unlock
  - babyai-go-to-local
  - babyai-go-to-obj-door
  - babyai-go-to-obj
  - babyai-go-to-red-ball-grey
  - babyai-go-to-red-ball-no-dists
  - babyai-go-to-red-ball
  - babyai-go-to-red-blue-ball
  - babyai-go-to-seq
  - babyai-go-to
  - babyai-key-corridor
  - babyai-mini-boss-level
  - babyai-move-two-across-s8n9
  - babyai-one-room-s8
  - babyai-open-door
  - babyai-open-doors-order-n4
  - babyai-open-red-door
  - babyai-open-two-doors
  - babyai-open
  - babyai-pickup-above
  - babyai-pickup-dist
  - babyai-pickup-loc
  - babyai-pickup
  - babyai-put-next-local
  - babyai-put-next
  - babyai-synth-loc
  - babyai-synth-seq
  - babyai-synth
  - babyai-unblock-pickup
  - babyai-unlock-local
  - babyai-unlock-pickup
  - babyai-unlock-to-unlock
  - babyai-unlock
  - metaworld-assembly
  - metaworld-basketball
  - metaworld-bin-picking
  - metaworld-box-close
  - metaworld-button-press-topdown-wall
  - metaworld-button-press-topdown
  - metaworld-button-press-wall
  - metaworld-button-press
  - metaworld-coffee-button
  - metaworld-coffee-pull
  - metaworld-coffee-push
  - metaworld-dial-turn
  - metaworld-disassemble
  - metaworld-door-close
  - metaworld-door-lock
  - metaworld-door-open
  - metaworld-door-unlock
  - metaworld-drawer-close
  - metaworld-drawer-open
  - metaworld-faucet-close
  - metaworld-faucet-open
  - metaworld-hammer
  - metaworld-hand-insert
  - metaworld-handle-press-side
  - metaworld-handle-press
  - metaworld-handle-pull-side
  - metaworld-handle-pull
  - metaworld-lever-pull
  - metaworld-peg-insert-side
  - metaworld-peg-unplug-side
  - metaworld-pick-out-of-hole
  - metaworld-pick-place-wall
  - metaworld-pick-place
  - metaworld-plate-slide-back-side
  - metaworld-plate-slide-back
  - metaworld-plate-slide-side
  - metaworld-plate-slide
  - metaworld-push-back
  - metaworld-push-wall
  - metaworld-push
  - metaworld-reach-wall
  - metaworld-reach
  - metaworld-shelf-place
  - metaworld-soccer
  - metaworld-stick-pull
  - metaworld-stick-push
  - metaworld-sweep-into
  - metaworld-sweep
  - metaworld-window-close
  - metaworld-window-open
  - mujoco-ant
  - mujoco-doublependulum
  - mujoco-halfcheetah
  - mujoco-hopper
  - mujoco-humanoid
  - mujoco-pendulum
  - mujoco-pusher
  - mujoco-reacher
  - mujoco-standup
  - mujoco-swimmer
  - mujoco-walker
datasets: jat-project/jat-dataset
pipeline_tag: reinforcement-learning
model-index:
  - name: jat-project/jat
    results:
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Atari 57
          type: atari
        metrics:
          - type: iqm_expert_normalized_total_reward
            value: 0.06 [0.06, 0.06]
            name: IQM expert normalized total reward
          - type: iqm_human_normalized_total_reward
            value: 0.17 [0.16, 0.17]
            name: IQM human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: BabyAI
          type: babyai
        metrics:
          - type: iqm_expert_normalized_total_reward
            value: 0.99 [0.99, 0.99]
            name: IQM expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: MetaWorld
          type: metaworld
        metrics:
          - type: iqm_expert_normalized_total_reward
            value: 0.68 [0.67, 0.69]
            name: IQM expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: MuJoCo
          type: mujoco
        metrics:
          - type: iqm_expert_normalized_total_reward
            value: 0.81 [0.80, 0.82]
            name: IQM expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Alien
          type: atari-alien
        metrics:
          - type: total_reward
            value: 1085.90 +/- 396.36
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.05 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.12 +/- 0.06
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Amidar
          type: atari-amidar
        metrics:
          - type: total_reward
            value: 41.26 +/- 28.57
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.02 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.02 +/- 0.02
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Assault
          type: atari-assault
        metrics:
          - type: total_reward
            value: 772.89 +/- 59.34
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.04 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.06 +/- 0.11
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Asterix
          type: atari-asterix
        metrics:
          - type: total_reward
            value: 778.50 +/- 428.97
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.16 +/- 0.12
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.07 +/- 0.05
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Asteroids
          type: atari-asteroids
        metrics:
          - type: total_reward
            value: 1423.60 +/- 538.79
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.02 +/- 0.01
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Atlantis
          type: atari-atlantis
        metrics:
          - type: total_reward
            value: 23541.00 +/- 10376.72
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.03 +/- 0.03
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.66 +/- 0.64
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Bank Heist
          type: atari-bankheist
        metrics:
          - type: total_reward
            value: 685.50 +/- 157.92
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.51 +/- 0.12
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.91 +/- 0.21
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Battle Zone
          type: atari-battlezone
        metrics:
          - type: total_reward
            value: 12950.00 +/- 4306.68
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.04 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.34 +/- 0.12
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Beam Rider
          type: atari-beamrider
        metrics:
          - type: total_reward
            value: 762.04 +/- 243.25
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.02 +/- 0.01
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Berzerk
          type: atari-berzerk
        metrics:
          - type: total_reward
            value: 523.90 +/- 161.95
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.16 +/- 0.06
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Bowling
          type: atari-bowling
        metrics:
          - type: total_reward
            value: 29.99 +/- 11.49
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.05 +/- 0.08
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Boxing
          type: atari-boxing
        metrics:
          - type: total_reward
            value: 87.00 +/- 22.57
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.89 +/- 0.23
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 7.24 +/- 1.88
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Breakout
          type: atari-breakout
        metrics:
          - type: total_reward
            value: 9.16 +/- 5.76
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.26 +/- 0.20
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Centipede
          type: atari-centipede
        metrics:
          - type: total_reward
            value: 4461.72 +/- 2188.80
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.25 +/- 0.23
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.24 +/- 0.22
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Chopper Command
          type: atari-choppercommand
        metrics:
          - type: total_reward
            value: 1497.00 +/- 723.11
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.10 +/- 0.11
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Crazy Climber
          type: atari-crazyclimber
        metrics:
          - type: total_reward
            value: 52850.00 +/- 31617.86
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.25 +/- 0.19
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.68 +/- 1.26
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Defender
          type: atari-defender
        metrics:
          - type: total_reward
            value: 10627.50 +/- 4473.21
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.02 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.49 +/- 0.28
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Demon Attack
          type: atari-demonattack
        metrics:
          - type: total_reward
            value: 315.10 +/- 279.01
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.09 +/- 0.15
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Double Dunk
          type: atari-doubledunk
        metrics:
          - type: total_reward
            value: 0.08 +/- 11.61
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.47 +/- 0.29
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.53 +/- 0.33
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Enduro
          type: atari-enduro
        metrics:
          - type: total_reward
            value: 111.49 +/- 27.36
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.05 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.13 +/- 0.03
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Fishing Derby
          type: atari-fishingderby
        metrics:
          - type: total_reward
            value: '-55.21 +/- 19.35'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.37 +/- 0.20
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.28 +/- 0.15
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Freeway
          type: atari-freeway
        metrics:
          - type: total_reward
            value: 24.12 +/- 1.64
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.71 +/- 0.05
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.81 +/- 0.06
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Frostbite
          type: atari-frostbite
        metrics:
          - type: total_reward
            value: 617.30 +/- 686.11
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.04 +/- 0.05
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.13 +/- 0.16
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Gopher
          type: atari-gopher
        metrics:
          - type: total_reward
            value: 2947.20 +/- 1448.32
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.03 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.25 +/- 0.67
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Gravitar
          type: atari-gravitar
        metrics:
          - type: total_reward
            value: 1030.50 +/- 719.20
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.22 +/- 0.19
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.27 +/- 0.23
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: H.E.R.O.
          type: atari-hero
        metrics:
          - type: total_reward
            value: 6997.95 +/- 2562.51
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.14 +/- 0.06
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.20 +/- 0.09
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Ice Hockey
          type: atari-icehockey
        metrics:
          - type: total_reward
            value: '-3.77 +/- 3.10'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.20 +/- 0.09
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.61 +/- 0.26
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: James Bond
          type: atari-jamesbond
        metrics:
          - type: total_reward
            value: 187.50 +/- 72.24
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.58 +/- 0.26
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Kangaroo
          type: atari-kangaroo
        metrics:
          - type: total_reward
            value: 124.00 +/- 156.92
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.14 +/- 0.30
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.02 +/- 0.05
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Krull
          type: atari-krull
        metrics:
          - type: total_reward
            value: 8933.00 +/- 1358.65
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.75 +/- 0.14
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 6.87 +/- 1.27
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Kung-Fu Master
          type: atari-kungfumaster
        metrics:
          - type: total_reward
            value: 100.00 +/- 142.13
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-0.00 +/- 0.00'
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: '-0.01 +/- 0.01'
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Montezuma's Revenge
          type: atari-montezumarevenge
        metrics:
          - type: total_reward
            value: 0.00 +/- 0.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Ms. Pacman
          type: atari-mspacman
        metrics:
          - type: total_reward
            value: 1516.30 +/- 376.72
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.18 +/- 0.06
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.18 +/- 0.06
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Name This Game
          type: atari-namethisgame
        metrics:
          - type: total_reward
            value: 3798.60 +/- 1361.64
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.07 +/- 0.07
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.26 +/- 0.24
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Phoenix
          type: atari-phoenix
        metrics:
          - type: total_reward
            value: 1267.50 +/- 1013.72
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.08 +/- 0.16
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: PitFall
          type: atari-pitfall
        metrics:
          - type: total_reward
            value: '-287.36 +/- 492.82'
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-0.25 +/- 2.16'
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: '-0.01 +/- 0.07'
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pong
          type: atari-pong
        metrics:
          - type: total_reward
            value: '-11.03 +/- 11.29'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.23 +/- 0.27
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.27 +/- 0.32
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Private Eye
          type: atari-privateeye
        metrics:
          - type: total_reward
            value: 96.00 +/- 19.60
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.95 +/- 0.26
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Q*Bert
          type: atari-qbert
        metrics:
          - type: total_reward
            value: 1701.75 +/- 1912.56
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.04 +/- 0.04
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.12 +/- 0.14
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: River Raid
          type: atari-riverraid
        metrics:
          - type: total_reward
            value: 2793.10 +/- 693.84
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.11 +/- 0.05
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.09 +/- 0.04
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Road Runner
          type: atari-roadrunner
        metrics:
          - type: total_reward
            value: 7699.00 +/- 3446.61
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.10 +/- 0.04
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.98 +/- 0.44
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Robotank
          type: atari-robotank
        metrics:
          - type: total_reward
            value: 16.36 +/- 5.24
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.18 +/- 0.07
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 1.46 +/- 0.54
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Seaquest
          type: atari-seaquest
        metrics:
          - type: total_reward
            value: 515.20 +/- 141.51
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.18 +/- 0.06
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.01 +/- 0.00
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Skiing
          type: atari-skiing
        metrics:
          - type: total_reward
            value: '-29396.08 +/- 3289.80'
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-1.93 +/- 0.52'
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: '-0.96 +/- 0.26'
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Solaris
          type: atari-solaris
        metrics:
          - type: total_reward
            value: 988.20 +/- 487.42
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-2.11 +/- 4.15'
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: '-0.02 +/- 0.04'
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Space Invaders
          type: atari-spaceinvaders
        metrics:
          - type: total_reward
            value: 339.50 +/- 164.05
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.13 +/- 0.11
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Star Gunner
          type: atari-stargunner
        metrics:
          - type: total_reward
            value: 978.00 +/- 638.37
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.03 +/- 0.07
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Surround
          type: atari-surround
        metrics:
          - type: total_reward
            value: '-8.22 +/- 1.19'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.09 +/- 0.06
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.11 +/- 0.07
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Tennis
          type: atari-tennis
        metrics:
          - type: total_reward
            value: '-22.38 +/- 2.22'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.04 +/- 0.06
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.04 +/- 0.07
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Time Pilot
          type: atari-timepilot
        metrics:
          - type: total_reward
            value: 9534.00 +/- 2577.76
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.09 +/- 0.04
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 3.59 +/- 1.55
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Tutankham
          type: atari-tutankham
        metrics:
          - type: total_reward
            value: 40.20 +/- 14.51
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.10 +/- 0.05
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.18 +/- 0.09
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Up and Down
          type: atari-upndown
        metrics:
          - type: total_reward
            value: 6072.00 +/- 2283.30
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.01
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.50 +/- 0.20
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Venture
          type: atari-venture
        metrics:
          - type: total_reward
            value: 0.00 +/- 0.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.00
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Video Pinball
          type: atari-videopinball
        metrics:
          - type: total_reward
            value: 7943.01 +/- 8351.21
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.02 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.45 +/- 0.47
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Wizard of Wor
          type: atari-wizardofwor
        metrics:
          - type: total_reward
            value: 1306.00 +/- 1139.81
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.02 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.18 +/- 0.27
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Yars Revenge
          type: atari-yarsrevenge
        metrics:
          - type: total_reward
            value: 8597.41 +/- 4291.81
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.02 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.11 +/- 0.08
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Zaxxon
          type: atari-zaxxon
        metrics:
          - type: total_reward
            value: 896.00 +/- 1172.68
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.02
            name: Expert normalized total reward
          - type: human_normalized_total_reward
            value: 0.09 +/- 0.13
            name: Human normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Action Obj Door
          type: babyai-action-obj-door
        metrics:
          - type: total_reward
            value: 0.95 +/- 0.13
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.94 +/- 0.22
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Blocked Unlock Pickup
          type: babyai-blocked-unlock-pickup
        metrics:
          - type: total_reward
            value: 0.95 +/- 0.01
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.01
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Boss Level No Unlock
          type: babyai-boss-level-no-unlock
        metrics:
          - type: total_reward
            value: 0.44 +/- 0.45
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.43 +/- 0.51
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Boss Level
          type: babyai-boss-level
        metrics:
          - type: total_reward
            value: 0.48 +/- 0.45
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.48 +/- 0.51
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Find Obj S5
          type: babyai-find-obj-s5
        metrics:
          - type: total_reward
            value: 0.95 +/- 0.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Door
          type: babyai-go-to-door
        metrics:
          - type: total_reward
            value: 0.99 +/- 0.01
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.01
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Imp Unlock
          type: babyai-go-to-imp-unlock
        metrics:
          - type: total_reward
            value: 0.50 +/- 0.44
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.56 +/- 0.59
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Local
          type: babyai-go-to-local
        metrics:
          - type: total_reward
            value: 0.88 +/- 0.14
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.94 +/- 0.18
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Obj Door
          type: babyai-go-to-obj-door
        metrics:
          - type: total_reward
            value: 0.98 +/- 0.04
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.08
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Obj
          type: babyai-go-to-obj
        metrics:
          - type: total_reward
            value: 0.93 +/- 0.04
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.05
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Red Ball Grey
          type: babyai-go-to-red-ball-grey
        metrics:
          - type: total_reward
            value: 0.91 +/- 0.06
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.08
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Red Ball No Dists
          type: babyai-go-to-red-ball-no-dists
        metrics:
          - type: total_reward
            value: 0.93 +/- 0.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Red Ball
          type: babyai-go-to-red-ball
        metrics:
          - type: total_reward
            value: 0.91 +/- 0.08
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.98 +/- 0.11
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Red Blue Ball
          type: babyai-go-to-red-blue-ball
        metrics:
          - type: total_reward
            value: 0.88 +/- 0.11
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.96 +/- 0.13
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To Seq
          type: babyai-go-to-seq
        metrics:
          - type: total_reward
            value: 0.73 +/- 0.34
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.75 +/- 0.40
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Go To
          type: babyai-go-to
        metrics:
          - type: total_reward
            value: 0.80 +/- 0.27
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.85 +/- 0.35
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Key Corridor
          type: babyai-key-corridor
        metrics:
          - type: total_reward
            value: 0.88 +/- 0.10
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.11
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Mini Boss Level
          type: babyai-mini-boss-level
        metrics:
          - type: total_reward
            value: 0.69 +/- 0.35
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.76 +/- 0.43
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Move Two Across S8N9
          type: babyai-move-two-across-s8n9
        metrics:
          - type: total_reward
            value: 0.03 +/- 0.15
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.03 +/- 0.16
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: One Room S8
          type: babyai-one-room-s8
        metrics:
          - type: total_reward
            value: 0.92 +/- 0.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open Door
          type: babyai-open-door
        metrics:
          - type: total_reward
            value: 0.99 +/- 0.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.01
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open Doors Order N4
          type: babyai-open-doors-order-n4
        metrics:
          - type: total_reward
            value: 0.96 +/- 0.11
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.13
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open Red Door
          type: babyai-open-red-door
        metrics:
          - type: total_reward
            value: 0.92 +/- 0.02
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.03
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open Two Doors
          type: babyai-open-two-doors
        metrics:
          - type: total_reward
            value: 0.98 +/- 0.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.00
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Open
          type: babyai-open
        metrics:
          - type: total_reward
            value: 0.93 +/- 0.11
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.13
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pickup Above
          type: babyai-pickup-above
        metrics:
          - type: total_reward
            value: 0.92 +/- 0.06
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.01 +/- 0.07
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pickup Dist
          type: babyai-pickup-dist
        metrics:
          - type: total_reward
            value: 0.88 +/- 0.13
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.03 +/- 0.18
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pickup Loc
          type: babyai-pickup-loc
        metrics:
          - type: total_reward
            value: 0.84 +/- 0.20
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.91 +/- 0.24
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pickup
          type: babyai-pickup
        metrics:
          - type: total_reward
            value: 0.72 +/- 0.34
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.77 +/- 0.40
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Put Next Local
          type: babyai-put-next-local
        metrics:
          - type: total_reward
            value: 0.60 +/- 0.36
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.65 +/- 0.39
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Put Next S7N4
          type: babyai-put-next
        metrics:
          - type: total_reward
            value: 0.82 +/- 0.26
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.86 +/- 0.27
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Synth Loc
          type: babyai-synth-loc
        metrics:
          - type: total_reward
            value: 0.82 +/- 0.31
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.85 +/- 0.38
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Synth Seq
          type: babyai-synth-seq
        metrics:
          - type: total_reward
            value: 0.57 +/- 0.44
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.57 +/- 0.50
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Synth
          type: babyai-synth
        metrics:
          - type: total_reward
            value: 0.68 +/- 0.39
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.69 +/- 0.47
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unblock Pickup
          type: babyai-unblock-pickup
        metrics:
          - type: total_reward
            value: 0.76 +/- 0.33
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.82 +/- 0.39
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unlock Local
          type: babyai-unlock-local
        metrics:
          - type: total_reward
            value: 0.98 +/- 0.01
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.01
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unlock Pickup
          type: babyai-unlock-pickup
        metrics:
          - type: total_reward
            value: 0.76 +/- 0.03
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.01 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unlock To Unlock
          type: babyai-unlock-to-unlock
        metrics:
          - type: total_reward
            value: 0.86 +/- 0.29
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.89 +/- 0.30
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Unlock
          type: babyai-unlock
        metrics:
          - type: total_reward
            value: 0.55 +/- 0.42
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.63 +/- 0.50
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Assembly
          type: metaworld-assembly
        metrics:
          - type: total_reward
            value: 238.32 +/- 32.98
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.96 +/- 0.16
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Basketball
          type: metaworld-basketball
        metrics:
          - type: total_reward
            value: 1.59 +/- 0.43
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-0.00 +/- 0.00'
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: BinPicking
          type: metaworld-bin-picking
        metrics:
          - type: total_reward
            value: 374.18 +/- 168.23
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.88 +/- 0.40
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Box Close
          type: metaworld-box-close
        metrics:
          - type: total_reward
            value: 510.10 +/- 117.47
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.27
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Button Press Topdown Wall
          type: metaworld-button-press-topdown-wall
        metrics:
          - type: total_reward
            value: 260.07 +/- 67.75
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.49 +/- 0.14
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Button Press Topdown
          type: metaworld-button-press-topdown
        metrics:
          - type: total_reward
            value: 265.16 +/- 77.93
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.51 +/- 0.17
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Button Press Wall
          type: metaworld-button-press-wall
        metrics:
          - type: total_reward
            value: 621.75 +/- 137.13
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.92 +/- 0.21
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Button Press
          type: metaworld-button-press
        metrics:
          - type: total_reward
            value: 556.75 +/- 198.85
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.86 +/- 0.33
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Coffee Button
          type: metaworld-coffee-button
        metrics:
          - type: total_reward
            value: 250.50 +/- 266.92
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.31 +/- 0.38
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Coffee Pull
          type: metaworld-coffee-pull
        metrics:
          - type: total_reward
            value: 55.13 +/- 96.96
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.20 +/- 0.38
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Coffee Push
          type: metaworld-coffee-push
        metrics:
          - type: total_reward
            value: 269.17 +/- 237.82
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.54 +/- 0.48
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Dial Turn
          type: metaworld-dial-turn
        metrics:
          - type: total_reward
            value: 738.22 +/- 168.43
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.93 +/- 0.22
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Disassemble
          type: metaworld-disassemble
        metrics:
          - type: total_reward
            value: 39.14 +/- 11.85
            name: Total reward
          - type: expert_normalized_total_reward
            value: '-0.47 +/- 4.70'
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Door Close
          type: metaworld-door-close
        metrics:
          - type: total_reward
            value: 528.17 +/- 29.90
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.06
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Door Lock
          type: metaworld-door-lock
        metrics:
          - type: total_reward
            value: 676.51 +/- 192.68
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.81 +/- 0.28
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Door Open
          type: metaworld-door-open
        metrics:
          - type: total_reward
            value: 572.76 +/- 57.53
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.98 +/- 0.11
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Door Unlock
          type: metaworld-door-unlock
        metrics:
          - type: total_reward
            value: 654.94 +/- 260.64
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.79 +/- 0.37
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Drawer Close
          type: metaworld-drawer-close
        metrics:
          - type: total_reward
            value: 663.02 +/- 214.51
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.73 +/- 0.29
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Drawer Open
          type: metaworld-drawer-open
        metrics:
          - type: total_reward
            value: 489.07 +/- 21.28
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.06
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Faucet Close
          type: metaworld-faucet-close
        metrics:
          - type: total_reward
            value: 361.32 +/- 72.28
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.22 +/- 0.14
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Faucet Open
          type: metaworld-faucet-open
        metrics:
          - type: total_reward
            value: 637.86 +/- 134.50
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.85 +/- 0.29
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Hammer
          type: metaworld-hammer
        metrics:
          - type: total_reward
            value: 691.72 +/- 25.25
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Hand Insert
          type: metaworld-hand-insert
        metrics:
          - type: total_reward
            value: 719.57 +/- 99.26
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.13
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Handle Press Side
          type: metaworld-handle-press-side
        metrics:
          - type: total_reward
            value: 84.25 +/- 113.34
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.03 +/- 0.14
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Handle Press
          type: metaworld-handle-press
        metrics:
          - type: total_reward
            value: 731.94 +/- 261.90
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.84 +/- 0.34
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Handle Pull Side
          type: metaworld-handle-pull-side
        metrics:
          - type: total_reward
            value: 233.11 +/- 199.49
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.60 +/- 0.52
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Handle Pull
          type: metaworld-handle-pull
        metrics:
          - type: total_reward
            value: 501.29 +/- 209.45
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.74 +/- 0.32
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Lever Pull
          type: metaworld-lever-pull
        metrics:
          - type: total_reward
            value: 250.18 +/- 228.59
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.34 +/- 0.41
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Peg Insert Side
          type: metaworld-peg-insert-side
        metrics:
          - type: total_reward
            value: 288.02 +/- 157.87
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.91 +/- 0.50
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Peg Unplug Side
          type: metaworld-peg-unplug-side
        metrics:
          - type: total_reward
            value: 68.48 +/- 125.34
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.14 +/- 0.28
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pick Out Of Hole
          type: metaworld-pick-out-of-hole
        metrics:
          - type: total_reward
            value: 2.08 +/- 0.05
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.00 +/- 0.00
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pick Place Wall
          type: metaworld-pick-place-wall
        metrics:
          - type: total_reward
            value: 6.87 +/- 44.99
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.02 +/- 0.10
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pick Place
          type: metaworld-pick-place
        metrics:
          - type: total_reward
            value: 264.18 +/- 195.69
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.63 +/- 0.47
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Plate Slide Back Side
          type: metaworld-plate-slide-back-side
        metrics:
          - type: total_reward
            value: 697.54 +/- 137.79
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.95 +/- 0.20
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Plate Slide Back
          type: metaworld-plate-slide-back
        metrics:
          - type: total_reward
            value: 196.80 +/- 1.73
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.24 +/- 0.00
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Plate Slide Side
          type: metaworld-plate-slide-side
        metrics:
          - type: total_reward
            value: 122.61 +/- 24.52
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.16 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Plate Slide
          type: metaworld-plate-slide
        metrics:
          - type: total_reward
            value: 497.42 +/- 168.74
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.93 +/- 0.37
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Push Back
          type: metaworld-push-back
        metrics:
          - type: total_reward
            value: 91.41 +/- 115.05
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.08 +/- 1.37
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Push Wall
          type: metaworld-push-wall
        metrics:
          - type: total_reward
            value: 116.49 +/- 208.05
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.15 +/- 0.28
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Push
          type: metaworld-push
        metrics:
          - type: total_reward
            value: 604.25 +/- 261.90
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.80 +/- 0.35
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Reach Wall
          type: metaworld-reach-wall
        metrics:
          - type: total_reward
            value: 634.57 +/- 231.40
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.81 +/- 0.38
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Reach
          type: metaworld-reach
        metrics:
          - type: total_reward
            value: 325.27 +/- 159.21
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.33 +/- 0.30
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Shelf Place
          type: metaworld-shelf-place
        metrics:
          - type: total_reward
            value: 124.60 +/- 112.83
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.52 +/- 0.47
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Soccer
          type: metaworld-soccer
        metrics:
          - type: total_reward
            value: 364.50 +/- 175.45
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.47
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Stick Pull
          type: metaworld-stick-pull
        metrics:
          - type: total_reward
            value: 398.64 +/- 205.60
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.76 +/- 0.39
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Stick Push
          type: metaworld-stick-push
        metrics:
          - type: total_reward
            value: 158.29 +/- 264.59
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.25 +/- 0.42
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Sweep Into
          type: metaworld-sweep-into
        metrics:
          - type: total_reward
            value: 775.30 +/- 119.00
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.97 +/- 0.15
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Sweep
          type: metaworld-sweep
        metrics:
          - type: total_reward
            value: 15.64 +/- 9.29
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.01 +/- 0.02
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Window Close
          type: metaworld-window-close
        metrics:
          - type: total_reward
            value: 423.33 +/- 203.92
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.69 +/- 0.38
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Window Open
          type: metaworld-window-open
        metrics:
          - type: total_reward
            value: 593.10 +/- 54.83
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.00 +/- 0.10
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Ant
          type: mujoco-ant
        metrics:
          - type: total_reward
            value: 5268.02 +/- 1495.39
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.90 +/- 0.25
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Inverted Double Pendulum
          type: mujoco-doublependulum
        metrics:
          - type: total_reward
            value: 4750.14 +/- 931.20
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.51 +/- 0.10
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Half Cheetah
          type: mujoco-halfcheetah
        metrics:
          - type: total_reward
            value: 6659.69 +/- 409.71
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.90 +/- 0.05
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Hopper
          type: mujoco-hopper
        metrics:
          - type: total_reward
            value: 1835.93 +/- 532.21
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.29
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Humanoid
          type: mujoco-humanoid
        metrics:
          - type: total_reward
            value: 697.44 +/- 108.06
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.09 +/- 0.02
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Inverted Pendulum
          type: mujoco-pendulum
        metrics:
          - type: total_reward
            value: 116.34 +/- 20.19
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.23 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Pusher
          type: mujoco-pusher
        metrics:
          - type: total_reward
            value: '-26.33 +/- 6.32'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.05
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Reacher
          type: mujoco-reacher
        metrics:
          - type: total_reward
            value: '-6.06 +/- 2.64'
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.99 +/- 0.07
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Humanoid Standup
          type: mujoco-standup
        metrics:
          - type: total_reward
            value: 118125.15 +/- 24880.28
            name: Total reward
          - type: expert_normalized_total_reward
            value: 0.35 +/- 0.10
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Swimmer
          type: mujoco-swimmer
        metrics:
          - type: total_reward
            value: 93.26 +/- 3.78
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.01 +/- 0.04
            name: Expert normalized total reward
      - task:
          type: reinforcement-learning
          name: Reinforcement Learning
        dataset:
          name: Walker 2d
          type: mujoco-walker
        metrics:
          - type: total_reward
            value: 4662.43 +/- 762.67
            name: Total reward
          - type: expert_normalized_total_reward
            value: 1.01 +/- 0.16
            name: Expert normalized total reward

Model Card for Jat

This is a multi-modal and multi-task model.

Model Details

Model Description

  • Developed by: The JAT Team
  • License: Apache 2.0

Model Sources

Training

The model was trained on the following tasks:

  • Alien
  • Amidar
  • Assault
  • Asterix
  • Asteroids
  • Atlantis
  • Bank Heist
  • Battle Zone
  • Beam Rider
  • Berzerk
  • Bowling
  • Boxing
  • Breakout
  • Centipede
  • Chopper Command
  • Crazy Climber
  • Defender
  • Demon Attack
  • Double Dunk
  • Enduro
  • Fishing Derby
  • Freeway
  • Frostbite
  • Gopher
  • Gravitar
  • H.E.R.O.
  • Ice Hockey
  • James Bond
  • Kangaroo
  • Krull
  • Kung-Fu Master
  • Montezuma's Revenge
  • Ms. Pacman
  • Name This Game
  • Phoenix
  • PitFall
  • Pong
  • Private Eye
  • Q*Bert
  • River Raid
  • Road Runner
  • Robotank
  • Seaquest
  • Skiing
  • Solaris
  • Space Invaders
  • Star Gunner
  • Surround
  • Tennis
  • Time Pilot
  • Tutankham
  • Up and Down
  • Venture
  • Video Pinball
  • Wizard of Wor
  • Yars Revenge
  • Zaxxon
  • Action Obj Door
  • Blocked Unlock Pickup
  • Boss Level No Unlock
  • Boss Level
  • Find Obj S5
  • Go To Door
  • Go To Imp Unlock
  • Go To Local
  • Go To Obj Door
  • Go To Obj
  • Go To Red Ball Grey
  • Go To Red Ball No Dists
  • Go To Red Ball
  • Go To Red Blue Ball
  • Go To Seq
  • Go To
  • Key Corridor
  • Mini Boss Level
  • Move Two Across S8N9
  • One Room S8
  • Open Door
  • Open Doors Order N4
  • Open Red Door
  • Open Two Doors
  • Open
  • Pickup Above
  • Pickup Dist
  • Pickup Loc
  • Pickup
  • Put Next Local
  • Put Next S7N4
  • Synth Loc
  • Synth Seq
  • Synth
  • Unblock Pickup
  • Unlock Local
  • Unlock Pickup
  • Unlock To Unlock
  • Unlock
  • Assembly
  • Basketball
  • BinPicking
  • Box Close
  • Button Press Topdown Wall
  • Button Press Topdown
  • Button Press Wall
  • Button Press
  • Coffee Button
  • Coffee Pull
  • Coffee Push
  • Dial Turn
  • Disassemble
  • Door Close
  • Door Lock
  • Door Open
  • Door Unlock
  • Drawer Close
  • Drawer Open
  • Faucet Close
  • Faucet Open
  • Hammer
  • Hand Insert
  • Handle Press Side
  • Handle Press
  • Handle Pull Side
  • Handle Pull
  • Lever Pull
  • Peg Insert Side
  • Peg Unplug Side
  • Pick Out Of Hole
  • Pick Place Wall
  • Pick Place
  • Plate Slide Back Side
  • Plate Slide Back
  • Plate Slide Side
  • Plate Slide
  • Push Back
  • Push Wall
  • Push
  • Reach Wall
  • Reach
  • Shelf Place
  • Soccer
  • Stick Pull
  • Stick Push
  • Sweep Into
  • Sweep
  • Window Close
  • Window Open
  • Ant
  • Inverted Double Pendulum
  • Half Cheetah
  • Hopper
  • Humanoid
  • Inverted Pendulum
  • Pusher
  • Reacher
  • Humanoid Standup
  • Swimmer
  • Walker 2d

How to Get Started with the Model

Use the code below to get started with the model.

from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("jat-project/jat")