sabretoothedhugs commited on
Commit
9b19c29
·
1 Parent(s): 5c619eb
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +161 -0
  2. .pre-commit-config.yaml +52 -0
  3. .readthedocs.yaml +23 -0
  4. CONTRIBUTING.md +3 -0
  5. LICENSE +21 -0
  6. MANIFEST.in +1 -0
  7. README.md +1 -0
  8. app.py +440 -0
  9. examples/__init__.py +0 -0
  10. examples/atari/README.md +137 -0
  11. examples/atari/__init__.py +0 -0
  12. examples/atari/atari_c51.py +218 -0
  13. examples/atari/atari_dqn.py +262 -0
  14. examples/atari/atari_dqn_hl.py +111 -0
  15. examples/atari/atari_fqf.py +231 -0
  16. examples/atari/atari_fqf_rainbow.py +288 -0
  17. examples/atari/atari_iqn.py +229 -0
  18. examples/atari/atari_iqn_hl.py +103 -0
  19. examples/atari/atari_network.py +308 -0
  20. examples/atari/atari_ppo.py +284 -0
  21. examples/atari/atari_ppo_hl.py +122 -0
  22. examples/atari/atari_qrdqn.py +222 -0
  23. examples/atari/atari_rainbow.py +258 -0
  24. examples/atari/atari_sac.py +271 -0
  25. examples/atari/atari_sac_hl.py +110 -0
  26. examples/atari/atari_wrapper.py +469 -0
  27. examples/atari/benchmark/BreakoutNoFrameskip-v4/result.json +1 -0
  28. examples/atari/benchmark/EnduroNoFrameskip-v4/result.json +1 -0
  29. examples/atari/benchmark/MsPacmanNoFrameskip-v4/result.json +1 -0
  30. examples/atari/benchmark/PongNoFrameskip-v4/result.json +1 -0
  31. examples/atari/benchmark/QbertNoFrameskip-v4/result.json +1 -0
  32. examples/atari/benchmark/SeaquestNoFrameskip-v4/result.json +1 -0
  33. examples/atari/benchmark/SpaceInvadersNoFrameskip-v4/result.json +1 -0
  34. examples/atari/results/c51/Breakout_rew.png +0 -0
  35. examples/atari/results/c51/Enduro_rew.png +0 -0
  36. examples/atari/results/c51/MsPacman_rew.png +0 -0
  37. examples/atari/results/c51/Pong_rew.png +0 -0
  38. examples/atari/results/c51/Qbert_rew.png +0 -0
  39. examples/atari/results/c51/Seaquest_rew.png +0 -0
  40. examples/atari/results/c51/SpaceInvader_rew.png +0 -0
  41. examples/atari/results/discrete_sac/Breakout_rew.png +0 -0
  42. examples/atari/results/discrete_sac/Enduro_rew.png +0 -0
  43. examples/atari/results/discrete_sac/MsPacman_rew.png +0 -0
  44. examples/atari/results/discrete_sac/Pong_rew.png +0 -0
  45. examples/atari/results/discrete_sac/Qbert_rew.png +0 -0
  46. examples/atari/results/discrete_sac/Seaquest_rew.png +0 -0
  47. examples/atari/results/discrete_sac/SpaceInvaders_rew.png +0 -0
  48. examples/atari/results/dqn/Breakout_rew.png +0 -0
  49. examples/atari/results/dqn/Enduro_rew.png +0 -0
  50. examples/atari/results/dqn/MsPacman_rew.png +0 -0
.gitignore ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # .idea folder
10
+ .idea/
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ pip-wheel-metadata/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+ cover/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101
+ __pypackages__/
102
+
103
+ # Celery stuff
104
+ celerybeat-schedule
105
+ celerybeat.pid
106
+
107
+ # SageMath parsed files
108
+ *.sage.py
109
+
110
+ # Environments
111
+ .env
112
+ .venv
113
+ venv/
114
+ /ENV/
115
+ env.bak/
116
+ venv.bak/
117
+
118
+ # Spyder project settings
119
+ .spyderproject
120
+ .spyproject
121
+
122
+ # Rope project settings
123
+ .ropeproject
124
+
125
+ # mkdocs documentation
126
+ /site
127
+
128
+ # mypy
129
+ .mypy_cache/
130
+ .dmypy.json
131
+ dmypy.json
132
+
133
+ # Pyre type checker
134
+ .pyre/
135
+
136
+ # pytype static type analyzer
137
+ .pytype/
138
+
139
+ # customize
140
+ log/
141
+ MUJOCO_LOG.TXT
142
+ *.pth
143
+ .vscode/
144
+ .DS_Store
145
+ *.zip
146
+ *.pstats
147
+ *.swp
148
+ *.pkl
149
+ *.hdf5
150
+ wandb/
151
+ videos/
152
+
153
+ # might be needed for IDE plugins that can't read ruff config
154
+ .flake8
155
+
156
+ docs/notebooks/_build/
157
+ docs/conf.py
158
+
159
+ # temporary scripts (for ad-hoc testing), temp folder
160
+ /temp
161
+ /temp*.py
.pre-commit-config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_install_hook_types: [commit-msg, pre-commit]
2
+ default_stages: [commit, manual]
3
+ fail_fast: false
4
+ repos:
5
+ - repo: https://github.com/pre-commit/pre-commit-hooks
6
+ rev: v4.4.0
7
+ hooks:
8
+ - id: check-added-large-files
9
+ - repo: local
10
+ hooks:
11
+ - id: ruff
12
+ name: ruff
13
+ entry: poetry run ruff
14
+ require_serial: true
15
+ language: system
16
+ types: [python]
17
+ - id: ruff-nb
18
+ name: ruff-nb
19
+ entry: poetry run nbqa ruff .
20
+ require_serial: true
21
+ language: system
22
+ pass_filenames: false
23
+ types: [python]
24
+ - id: black
25
+ name: black
26
+ entry: poetry run black
27
+ require_serial: true
28
+ language: system
29
+ types: [python]
30
+ - id: poetry-check
31
+ name: poetry check
32
+ entry: poetry check
33
+ language: system
34
+ files: pyproject.toml
35
+ pass_filenames: false
36
+ - id: poetry-lock-check
37
+ name: poetry lock check
38
+ entry: poetry check
39
+ args: [--lock]
40
+ language: system
41
+ pass_filenames: false
42
+ - id: mypy
43
+ name: mypy
44
+ entry: poetry run mypy tianshou examples test
45
+ # filenames should not be passed as they would collide with the config in pyproject.toml
46
+ pass_filenames: false
47
+ files: '^tianshou(/[^/]*)*/[^/]*\.py$'
48
+ language: system
49
+ - id: mypy-nb
50
+ name: mypy-nb
51
+ entry: poetry run nbqa mypy
52
+ language: system
.readthedocs.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # .readthedocs.yaml
2
+ # Read the Docs configuration file
3
+ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4
+
5
+ # Required
6
+ version: 2
7
+
8
+ # Set the version of Python and other tools you might need
9
+ build:
10
+ os: ubuntu-22.04
11
+ tools:
12
+ python: "3.11"
13
+ commands:
14
+ - mkdir -p $READTHEDOCS_OUTPUT/html
15
+ - curl -sSL https://install.python-poetry.org | python -
16
+ # - ~/.local/bin/poetry config virtualenvs.create false
17
+ - ~/.local/bin/poetry install --with dev
18
+ ## Same as poe tasks, but unfortunately poe doesn't work with poetry not creating virtualenvs
19
+ - ~/.local/bin/poetry run python docs/autogen_rst.py
20
+ - ~/.local/bin/poetry run which jupyter-book
21
+ - ~/.local/bin/poetry run python docs/create_toc.py
22
+ - ~/.local/bin/poetry run jupyter-book config sphinx docs/
23
+ - ~/.local/bin/poetry run sphinx-build -W -b html docs $READTHEDOCS_OUTPUT/html
CONTRIBUTING.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Contributing to Tianshou
2
+
3
+ Please refer to [tianshou.readthedocs.io/en/latest/contributing.html](https://tianshou.readthedocs.io/en/latest/contributing.html).
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Tianshou contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
MANIFEST.in ADDED
@@ -0,0 +1 @@
 
 
1
+ include LICENSE
README.md CHANGED
@@ -8,6 +8,7 @@ sdk_version: 4.37.2
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ python_version: 3.11
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from examples.atari.atari_network import C51
10
+ from examples.atari.atari_wrapper import wrap_deepmind
11
+
12
+ from tianshou.data import Collector
13
+ from examples.atari.tianshou.policy import C51Policy
14
+
15
+ import gymnasium as gym
16
+
17
+ from examples.atari.tianshou.env.venvs import DummyVectorEnv
18
+
19
+
20
+
21
+ from examples.atari.tianshou.utils.net.discrete import FractionProposalNetwork, FullQuantileFunction, FullQuantileFunctionRainbow
22
+ from examples.atari.atari_network import DQN
23
+ from examples.atari.tianshou.policy import FQFPolicy,FQF_RainbowPolicy
24
+
25
+
26
+ def seed(self, seed):
27
+ np.random.seed(seed)
28
+
29
+ # Define configuration parameters
30
+ config_c51 = {
31
+ "task": "PongNoFrameskip-v4",
32
+ "seed": 3128,
33
+ "scale_obs": 0,
34
+ "eps_test": 0.005,
35
+ "eps_train": 1.0,
36
+ "eps_train_final": 0.05,
37
+ "buffer_size": 100000,
38
+ "lr": 0.0001,
39
+ "gamma": 0.99,
40
+ "num_atoms": 51,
41
+ "v_min": -10.0,
42
+ "v_max": 10.0,
43
+ "n_step": 3,
44
+ "target_update_freq": 500,
45
+ "epoch": 100,
46
+ "step_per_epoch": 100000,
47
+ "step_per_collect": 10,
48
+ "update_per_step": 0.1,
49
+ "batch_size": 32,
50
+ "training_num": 1,
51
+ "test_num": 1,
52
+ "logdir": "log",
53
+ "render": 0.0,
54
+ "device": "cuda" if torch.cuda.is_available() else "cpu",
55
+ "frames_stack": 4,
56
+ "resume_path": "examples/atari/c51_pong.pth",
57
+ "resume_id": "",
58
+ "logger": "tensorboard",
59
+ "wandb_project": "atari.benchmark",
60
+ "watch": True,
61
+ "save_buffer_name": None
62
+ }
63
+
64
+
65
+ config_fqf = {
66
+ "task": "SpaceInvadersNoFrameskip-v4",
67
+ "seed": 3128,
68
+ "scale_obs": 0,
69
+ "eps_test": 0.005,
70
+ "eps_train": 1.0,
71
+ "eps_train_final": 0.05,
72
+ "buffer_size": 100000,
73
+ "lr": 5e-5,
74
+ "fraction_lr": 2.5e-9,
75
+ "gamma": 0.99,
76
+ "num_fractions": 32,
77
+ "num_cosines": 64,
78
+ "ent_coef": 10.0,
79
+ "hidden_sizes": [512],
80
+ "n_step": 3,
81
+ "target_update_freq": 500,
82
+ "epoch": 100,
83
+ "step_per_epoch": 100000,
84
+ "step_per_collect": 10,
85
+ "update_per_step": 0.1,
86
+ "batch_size": 32,
87
+ "training_num": 1,
88
+ "test_num": 1,
89
+ "logdir": "log",
90
+ "render": 0.0,
91
+ "device": "cuda" if torch.cuda.is_available() else "cpu",
92
+ "frames_stack": 4,
93
+ "resume_path": "fqf_pong.pth",
94
+ "resume_id": None,
95
+ "logger": "tensorboard",
96
+ "wandb_project": "atari.benchmark",
97
+ "watch": True,
98
+ "save_buffer_name": None,
99
+ }
100
+
101
+
102
+ config_fqf_r = {
103
+ "task": "PongNoFrameskip-v4",
104
+ "algo_name": "RainbowFQF",
105
+ "seed": 3128,
106
+ "scale_obs": 0,
107
+ "eps_test": 0.005,
108
+ "eps_train": 1.0,
109
+ "eps_train_final": 0.05,
110
+ "buffer_size": 100000,
111
+ "lr": 5e-5,
112
+ "fraction_lr": 2.5e-9,
113
+ "gamma": 0.99,
114
+ "num_fractions": 32,
115
+ "num_cosines": 64,
116
+ "ent_coef": 10.0,
117
+ "hidden_sizes": [512],
118
+ "n_step": 3,
119
+ "target_update_freq": 500,
120
+ "epoch": 100,
121
+ "step_per_epoch": 100000,
122
+ "step_per_collect": 10,
123
+ "update_per_step": 0.1,
124
+ "batch_size": 32,
125
+ "training_num": 1,
126
+ "test_num": 1,
127
+ "logdir": "log",
128
+ "no_dueling": False,
129
+ "no_noisy": False,
130
+ "no_priority": False,
131
+ "noisy_std": 0.1,
132
+ "alpha": 0.5,
133
+ "beta": 0.4,
134
+ "beta_final": 1.0,
135
+ "beta_anneal_step": 5000000,
136
+ "no_weight_norm": False,
137
+ "render": 0.0,
138
+ "device": "cuda" if torch.cuda.is_available() else "cpu",
139
+ "frames_stack": 4,
140
+ "resume_path": None,
141
+ "resume_id": None,
142
+ "logger": "tensorboard",
143
+ "wandb_project": "atari.benchmark",
144
+ "watch": False,
145
+ "save_buffer_name": None,
146
+ "per": False,
147
+ }
148
+
149
+
150
+ def test_c51(config : dict) -> None:
151
+ # _, _, test_envs,_ = make_atari_watch_env(
152
+ # config["task"],
153
+ # config["seed"],
154
+ # config["training_num"],
155
+ # config["test_num"],
156
+ # scale=config["scale_obs"],
157
+ # frame_stack=config["frames_stack"],
158
+ # )
159
+ env_wrap = gym.make(config["task"],render_mode = 'rgb_array')
160
+ env_deep = wrap_deepmind(env_wrap)
161
+ rec_env = DummyVectorEnv(
162
+ [
163
+ lambda: gym.wrappers.RecordVideo(
164
+ env_deep,
165
+ video_folder='video-app/'
166
+ )
167
+ ]
168
+ )
169
+ state_shape = env_deep.observation_space.shape or env_deep.observation_space.n
170
+ action_shape = env_deep.action_space.shape or env_deep.action_space.n
171
+ # should be N_FRAMES x H x W
172
+ print("Observations shape:", state_shape)
173
+ print("Actions shape:", action_shape)
174
+ # seed
175
+ np.random.seed(config["seed"])
176
+ torch.manual_seed(config["seed"])
177
+ rec_env.seed(config["seed"])
178
+ # test_envs.seed(config["seed"])
179
+
180
+
181
+ net = C51(*state_shape, action_shape, config["num_atoms"], config["device"])
182
+ optim = torch.optim.Adam(net.parameters(), lr=config["lr"])
183
+ # define policy
184
+ policy = C51Policy(
185
+ model=net,
186
+ optim=optim,
187
+ discount_factor=config["gamma"],
188
+ action_space=env_deep.action_space,
189
+ num_atoms=config["num_atoms"],
190
+ v_min=config["v_min"],
191
+ v_max=config["v_max"],
192
+ estimation_step=config["n_step"],
193
+ target_update_freq=config["target_update_freq"],
194
+ ).to(config["device"])
195
+ # load a previous policy
196
+ if config["resume_path"]:
197
+ policy.load_state_dict(torch.load(config["resume_path"], map_location=config["device"]))
198
+ print("Loaded agent from:", config["resume_path"])
199
+
200
+
201
+
202
+ collector = Collector(policy, rec_env, exploration_noise=True)
203
+ # result = collector.collect(n_episode=config["test_num"], render=config["render"])
204
+ result = collector.collect(n_episode=config["test_num"])
205
+ # Collector(policy, rec_env, exploration_noise=True).collect(n_episode=config["test_num"])
206
+ rec_env.close()
207
+ result.pprint_asdict()
208
+ return result
209
+
210
+ def test_FQF(config : dict) -> None:
211
+
212
+ # _, _, test_envs,_ = make_atari_watch_env(
213
+ # config["task"],
214
+ # config["seed"],
215
+ # config["training_num"],
216
+ # config["test_num"],
217
+ # scale=config["scale_obs"],
218
+ # frame_stack=config["frames_stack"],
219
+ # )
220
+
221
+ # env_wrap = gym.make(config["task"],render_mode = 'rgb_array')
222
+ env_deep = wrap_deepmind(gym.make(config["task"],render_mode = 'rgb_array'))
223
+ rec_env = DummyVectorEnv(
224
+ [
225
+ lambda: gym.wrappers.RecordVideo(
226
+ env_deep,
227
+ video_folder='video-app/'
228
+ )
229
+ ]
230
+ )
231
+
232
+ state_shape = env_deep.observation_space.shape or env_deep.observation_space.n
233
+ action_shape = env_deep.action_space.shape or env_deep.action_space.n
234
+ # should be N_FRAMES x H x W
235
+ print("Observations shape:", state_shape)
236
+ print("Actions shape:", action_shape)
237
+ # seed
238
+ np.random.seed(config["seed"])
239
+ torch.manual_seed(config["seed"])
240
+ rec_env.seed(config["seed"])
241
+ feature_net = DQN(*state_shape, action_shape, config["device"], features_only=True)
242
+
243
+ # Create FullQuantileFunction net
244
+ net = FullQuantileFunction(
245
+ feature_net,
246
+ action_shape,
247
+ config["hidden_sizes"],
248
+ config["num_cosines"],
249
+ ).to(config["device"])
250
+
251
+ # Create Adam optimizer
252
+ optim = torch.optim.Adam(net.parameters(), lr=config["lr"])
253
+
254
+ # Create FractionProposalNetwork
255
+ fraction_net = FractionProposalNetwork(config["num_fractions"], net.input_dim)
256
+
257
+ # Create RMSprop optimizer for fraction_net
258
+ fraction_optim = torch.optim.RMSprop(fraction_net.parameters(), lr=config["fraction_lr"])
259
+
260
+ # Define policy using FQFPolicy
261
+ policy: FQFPolicy = FQFPolicy(
262
+ model=net,
263
+ optim=optim,
264
+ fraction_model=fraction_net,
265
+ fraction_optim=fraction_optim,
266
+ action_space=env_deep.action_space,
267
+ discount_factor=config["gamma"],
268
+ num_fractions=config["num_fractions"],
269
+ ent_coef=config["ent_coef"],
270
+ estimation_step=config["n_step"],
271
+ target_update_freq=config["target_update_freq"],
272
+ ).to(config["device"])
273
+
274
+ # load a previous policy
275
+ if config["resume_path"]:
276
+ policy.load_state_dict(torch.load(config["resume_path"], map_location=config["device"]))
277
+ print("Loaded agent from:", config["resume_path"])
278
+
279
+
280
+ collector = Collector(policy, rec_env, exploration_noise=True)
281
+
282
+ # result = collector.collect(n_episode=config["test_num"], render=config["render"])
283
+ result = collector.collect(n_episode=config["test_num"])
284
+ # Collector(policy, rec_env, exploration_noise=True).collect(n_episode=config["test_num"])
285
+ rec_env.close()
286
+ result.pprint_asdict()
287
+ return result
288
+
289
+
290
+
291
+ def test_fqf_rainbow(config: dict) -> None:
292
+ # _, _, test_envs,_ = make_atari_watch_env(
293
+ # config['task'],
294
+ # config['seed'],
295
+ # config['training_num'],
296
+ # config['test_num'],
297
+ # scale=config['scale_obs'],
298
+ # frame_stack=config['frames_stack'],
299
+ # )
300
+
301
+ env_deep = wrap_deepmind(gym.make(config["task"],render_mode = 'rgb_array'))
302
+ rec_env = DummyVectorEnv(
303
+ [
304
+ lambda: gym.wrappers.RecordVideo(
305
+ env_deep,
306
+ video_folder='video-app/'
307
+ )
308
+ ]
309
+ )
310
+
311
+
312
+ config['state_shape'] = env_deep.observation_space.shape or env_deep.observation_space.n
313
+ config['action_shape'] = env_deep.action_space.shape or env_deep.action_space.n
314
+
315
+ # print(env_deep.action_space)
316
+ # print(test_envs.action_space)
317
+
318
+ # should be N_FRAMES x H x W
319
+ # print("Observations shape:", config['state_shape'])
320
+ # print("Actions shape:", config['action_shape'])
321
+ # seed
322
+ np.random.seed(config['seed'])
323
+ torch.manual_seed(config['seed'])
324
+ # test_envs.seed(config['seed'])
325
+ rec_env.seed(config['seed'])
326
+ # define model
327
+ feature_net = DQN(*config['state_shape'], config['action_shape'], config['device'], features_only=True)
328
+ preprocess_net_output_dim = feature_net.output_dim # Ensure this is correctly set
329
+ # print(preprocess_net_output_dim)
330
+ net = FullQuantileFunctionRainbow(
331
+ preprocess_net=feature_net,
332
+ action_shape=config['action_shape'],
333
+ hidden_sizes=config['hidden_sizes'],
334
+ num_cosines=config['num_cosines'],
335
+ preprocess_net_output_dim=preprocess_net_output_dim,
336
+ device=config['device'],
337
+ noisy_std=config['noisy_std'],
338
+ is_noisy=not config['no_noisy'], # Set to True to use noisy layers
339
+ is_dueling=not config['no_dueling'], # Set to True to use dueling layers
340
+ ).to(config['device'])
341
+ # print(net)
342
+ optim = torch.optim.Adam(net.parameters(), lr=config['lr'])
343
+ fraction_net = FractionProposalNetwork(config['num_fractions'], net.input_dim)
344
+ fraction_optim = torch.optim.RMSprop(fraction_net.parameters(), lr=config['fraction_lr'])
345
+ # define policy
346
+ policy: FQF_RainbowPolicy = FQF_RainbowPolicy(
347
+ model=net,
348
+ optim=optim,
349
+ fraction_model=fraction_net,
350
+ fraction_optim=fraction_optim,
351
+ action_space=env_deep.action_space,
352
+ discount_factor=config['gamma'],
353
+ num_fractions=config['num_fractions'],
354
+ ent_coef=config['ent_coef'],
355
+ estimation_step=config['n_step'],
356
+ target_update_freq=config['target_update_freq'],
357
+ is_noisy=not config['no_noisy']
358
+ ).to(config['device'])
359
+ # load a previous policy
360
+ if config['resume_path']:
361
+ policy.load_state_dict(torch.load(config['resume_path'], map_location=config['device']))
362
+ print("Loaded agent from:", config['resume_path'])
363
+ # policy.eval()
364
+ test_collector = Collector(policy, rec_env, exploration_noise=True)
365
+ result = test_collector.collect(n_episode=config["test_num"])
366
+
367
+ #replay
368
+ # Collector(policy, rec_env, exploration_noise=True).collect(n_episode=1)
369
+
370
+ rec_env.close()
371
+ result.pprint_asdict()
372
+ return result
373
+
374
+
375
+ # Define the function to display choices and mean scores
376
+ def display_choice(algo, game,slider):
377
+ # Dictionary to store mean scores for each algorithm and game
378
+ match algo:
379
+ case "C51":
380
+ match game:
381
+ case "Freeway":
382
+ config_c51["resume_path"] = "models/c51_freeway.pth"
383
+ config_c51["task"] = "FreewayNoFrameskip-v4"
384
+ mean_scores = test_c51(config_c51)
385
+
386
+ case "Pong" :
387
+ return 19
388
+
389
+ case "FQF":
390
+ match game:
391
+ case "Freeway":
392
+ config_fqf["resume_path"] = "models/fqf_freeway.pth"
393
+ config_fqf["task"] = "FreewayNoFrameskip-v4"
394
+ mean_scores = test_FQF(config_fqf)
395
+
396
+ case "Pong" :
397
+ return 20
398
+
399
+ case "FQF-Rainbow":
400
+ match game:
401
+ case "Freeway":
402
+ config_fqf_r["resume_path"] = "models/fqf-rainbow_freeway.pth"
403
+ config_fqf_r["task"] = "FreewayNoFrameskip-v4"
404
+ mean_scores = test_fqf_rainbow(config_fqf_r)
405
+
406
+ case "Pong" :
407
+ return 21
408
+
409
+
410
+
411
+ # Calculate or fetch the mean score for the selected combination
412
+ mean_score = mean_scores.returns_stat.mean
413
+
414
+ # Return the selected options and the mean score
415
+ # return f"Your {algo} agent finished {game} with a \nMean Score of ##{mean_score}"
416
+ return [mean_score,"video-app/rl-video-episode-0.mp4"]
417
+
418
+ # Define the choices for the radio buttons
419
+ algos = ["C51", "FQF", "FQF-Rainbow"]
420
+ # games = ["Pong", "Space Invaders","Freeway","MsPacman"]
421
+ games = ["Freeway"]
422
+
423
+
424
+ # Create a Gradio Interface
425
+ demo = gr.Interface(
426
+ fn=display_choice, # Function to call when an option is selected
427
+ inputs=[gr.Radio(algos,label="Algorithm"), gr.Radio(games, label="Game"),gr.Slider(maximum=100,label="Seed")], # Radio buttons with the defined choices
428
+ outputs=[gr.Textbox(label="Score"),gr.Video(autoplay=True,height=480,width=480,label="Replay")],
429
+ title="Distributional RL Algorithms Benchmark",
430
+ description="Select the DRL agent and the game of your choice",
431
+ theme="soft",
432
+ # examples=[["FQF","Pong",31],
433
+ # ["C51","Space Invaders",31],
434
+ # ["FQF-Rainbow","Freeway",31]
435
+ # ]
436
+ )
437
+
438
+ # Launch the Gradio app
439
+ if __name__ == "__main__":
440
+ demo.launch(share=False)
examples/__init__.py ADDED
File without changes
examples/atari/README.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Atari Environment
2
+
3
+ ## EnvPool
4
+
5
+ We highly recommend using envpool to run the following experiments. To install, in a linux machine, type:
6
+
7
+ ```bash
8
+ pip install envpool
9
+ ```
10
+
11
+ After that, `atari_wrapper` will automatically switch to envpool's Atari env. EnvPool's implementation is much faster (about 2\~3x faster for pure execution speed, 1.5x for overall RL training pipeline) than python vectorized env implementation, and it's behavior is consistent to that approach (OpenAI wrapper), which will describe below.
12
+
13
+ For more information, please refer to EnvPool's [GitHub](https://github.com/sail-sg/envpool/), [Docs](https://envpool.readthedocs.io/en/latest/api/atari.html), and [3rd-party report](https://iclr-blog-track.github.io/2022/03/25/ppo-implementation-details/#solving-pong-in-5-minutes-with-ppo--envpool).
14
+
15
+ ## ALE-py
16
+
17
+ The sample speed is \~3000 env step per second (\~12000 Atari frame per second in fact since we use frame_stack=4) under the normal mode (use a CNN policy and a collector, also storing data into the buffer).
18
+
19
+ The env wrapper is a crucial thing. Without wrappers, the agent cannot perform well enough on Atari games. Many existing RL codebases use [OpenAI wrapper](https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py), but it is not the original DeepMind version ([related issue](https://github.com/openai/baselines/issues/240)). Dopamine has a different [wrapper](https://github.com/google/dopamine/blob/master/dopamine/discrete_domains/atari_lib.py) but unfortunately it cannot work very well in our codebase.
20
+
21
+ # DQN (single run)
22
+
23
+ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M.
24
+
25
+ | task | best reward | reward curve | parameters | time cost |
26
+ | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ | ------------------- |
27
+ | PongNoFrameskip-v4 | 20 | ![](results/dqn/Pong_rew.png) | `python3 atari_dqn.py --task "PongNoFrameskip-v4" --batch-size 64` | ~30 min (~15 epoch) |
28
+ | BreakoutNoFrameskip-v4 | 316 | ![](results/dqn/Breakout_rew.png) | `python3 atari_dqn.py --task "BreakoutNoFrameskip-v4" --test-num 100` | 3~4h (100 epoch) |
29
+ | EnduroNoFrameskip-v4 | 670 | ![](results/dqn/Enduro_rew.png) | `python3 atari_dqn.py --task "EnduroNoFrameskip-v4 " --test-num 100` | 3~4h (100 epoch) |
30
+ | QbertNoFrameskip-v4 | 7307 | ![](results/dqn/Qbert_rew.png) | `python3 atari_dqn.py --task "QbertNoFrameskip-v4" --test-num 100` | 3~4h (100 epoch) |
31
+ | MsPacmanNoFrameskip-v4 | 2107 | ![](results/dqn/MsPacman_rew.png) | `python3 atari_dqn.py --task "MsPacmanNoFrameskip-v4" --test-num 100` | 3~4h (100 epoch) |
32
+ | SeaquestNoFrameskip-v4 | 2088 | ![](results/dqn/Seaquest_rew.png) | `python3 atari_dqn.py --task "SeaquestNoFrameskip-v4" --test-num 100` | 3~4h (100 epoch) |
33
+ | SpaceInvadersNoFrameskip-v4 | 812.2 | ![](results/dqn/SpaceInvader_rew.png) | `python3 atari_dqn.py --task "SpaceInvadersNoFrameskip-v4" --test-num 100` | 3~4h (100 epoch) |
34
+
35
+ Note: The `eps_train_final` and `eps_test` in the original DQN paper is 0.1 and 0.01, but [some works](https://github.com/google/dopamine/tree/master/baselines) found that smaller eps helps improve the performance. Also, a large batchsize (say 64 instead of 32) will help faster convergence but will slow down the training speed.
36
+
37
+ We haven't tuned this result to the best, so have fun with playing these hyperparameters!
38
+
39
+ # C51 (single run)
40
+
41
+ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M.
42
+
43
+ | task | best reward | reward curve | parameters |
44
+ | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ |
45
+ | PongNoFrameskip-v4 | 20 | ![](results/c51/Pong_rew.png) | `python3 atari_c51.py --task "PongNoFrameskip-v4" --batch-size 64` |
46
+ | BreakoutNoFrameskip-v4 | 536.6 | ![](results/c51/Breakout_rew.png) | `python3 atari_c51.py --task "BreakoutNoFrameskip-v4" --n-step 1` |
47
+ | EnduroNoFrameskip-v4 | 1032 | ![](results/c51/Enduro_rew.png) | `python3 atari_c51.py --task "EnduroNoFrameskip-v4 " ` |
48
+ | QbertNoFrameskip-v4 | 16245 | ![](results/c51/Qbert_rew.png) | `python3 atari_c51.py --task "QbertNoFrameskip-v4"` |
49
+ | MsPacmanNoFrameskip-v4 | 3133 | ![](results/c51/MsPacman_rew.png) | `python3 atari_c51.py --task "MsPacmanNoFrameskip-v4"` |
50
+ | SeaquestNoFrameskip-v4 | 6226 | ![](results/c51/Seaquest_rew.png) | `python3 atari_c51.py --task "SeaquestNoFrameskip-v4"` |
51
+ | SpaceInvadersNoFrameskip-v4 | 988.5 | ![](results/c51/SpaceInvader_rew.png) | `python3 atari_c51.py --task "SpaceInvadersNoFrameskip-v4"` |
52
+
53
+ Note: The selection of `n_step` is based on Figure 6 in the [Rainbow](https://arxiv.org/abs/1710.02298) paper.
54
+
55
+ # QRDQN (single run)
56
+
57
+ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M.
58
+
59
+ | task | best reward | reward curve | parameters |
60
+ | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ |
61
+ | PongNoFrameskip-v4 | 20 | ![](results/qrdqn/Pong_rew.png) | `python3 atari_qrdqn.py --task "PongNoFrameskip-v4" --batch-size 64` |
62
+ | BreakoutNoFrameskip-v4 | 409.2 | ![](results/qrdqn/Breakout_rew.png) | `python3 atari_qrdqn.py --task "BreakoutNoFrameskip-v4" --n-step 1` |
63
+ | EnduroNoFrameskip-v4 | 1055.9 | ![](results/qrdqn/Enduro_rew.png) | `python3 atari_qrdqn.py --task "EnduroNoFrameskip-v4"` |
64
+ | QbertNoFrameskip-v4 | 14990 | ![](results/qrdqn/Qbert_rew.png) | `python3 atari_qrdqn.py --task "QbertNoFrameskip-v4"` |
65
+ | MsPacmanNoFrameskip-v4 | 2886 | ![](results/qrdqn/MsPacman_rew.png) | `python3 atari_qrdqn.py --task "MsPacmanNoFrameskip-v4"` |
66
+ | SeaquestNoFrameskip-v4 | 5676 | ![](results/qrdqn/Seaquest_rew.png) | `python3 atari_qrdqn.py --task "SeaquestNoFrameskip-v4"` |
67
+ | SpaceInvadersNoFrameskip-v4 | 938 | ![](results/qrdqn/SpaceInvader_rew.png) | `python3 atari_qrdqn.py --task "SpaceInvadersNoFrameskip-v4"` |
68
+
69
+ # IQN (single run)
70
+
71
+ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M.
72
+
73
+ | task | best reward | reward curve | parameters |
74
+ | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ |
75
+ | PongNoFrameskip-v4 | 20.3 | ![](results/iqn/Pong_rew.png) | `python3 atari_iqn.py --task "PongNoFrameskip-v4" --batch-size 64` |
76
+ | BreakoutNoFrameskip-v4 | 496.7 | ![](results/iqn/Breakout_rew.png) | `python3 atari_iqn.py --task "BreakoutNoFrameskip-v4" --n-step 1` |
77
+ | EnduroNoFrameskip-v4 | 1545 | ![](results/iqn/Enduro_rew.png) | `python3 atari_iqn.py --task "EnduroNoFrameskip-v4"` |
78
+ | QbertNoFrameskip-v4 | 15342.5 | ![](results/iqn/Qbert_rew.png) | `python3 atari_iqn.py --task "QbertNoFrameskip-v4"` |
79
+ | MsPacmanNoFrameskip-v4 | 2915 | ![](results/iqn/MsPacman_rew.png) | `python3 atari_iqn.py --task "MsPacmanNoFrameskip-v4"` |
80
+ | SeaquestNoFrameskip-v4 | 4874 | ![](results/iqn/Seaquest_rew.png) | `python3 atari_iqn.py --task "SeaquestNoFrameskip-v4"` |
81
+ | SpaceInvadersNoFrameskip-v4 | 1498.5 | ![](results/iqn/SpaceInvaders_rew.png) | `python3 atari_iqn.py --task "SpaceInvadersNoFrameskip-v4"` |
82
+
83
+ # FQF (single run)
84
+
85
+ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M.
86
+
87
+ | task | best reward | reward curve | parameters |
88
+ | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ |
89
+ | PongNoFrameskip-v4 | 20.7 | ![](results/fqf/Pong_rew.png) | `python3 atari_fqf.py --task "PongNoFrameskip-v4" --batch-size 64` |
90
+ | BreakoutNoFrameskip-v4 | 517.3 | ![](results/fqf/Breakout_rew.png) | `python3 atari_fqf.py --task "BreakoutNoFrameskip-v4" --n-step 1` |
91
+ | EnduroNoFrameskip-v4 | 2240.5 | ![](results/fqf/Enduro_rew.png) | `python3 atari_fqf.py --task "EnduroNoFrameskip-v4"` |
92
+ | QbertNoFrameskip-v4 | 16172.5 | ![](results/fqf/Qbert_rew.png) | `python3 atari_fqf.py --task "QbertNoFrameskip-v4"` |
93
+ | MsPacmanNoFrameskip-v4 | 2429 | ![](results/fqf/MsPacman_rew.png) | `python3 atari_fqf.py --task "MsPacmanNoFrameskip-v4"` |
94
+ | SeaquestNoFrameskip-v4 | 10775 | ![](results/fqf/Seaquest_rew.png) | `python3 atari_fqf.py --task "SeaquestNoFrameskip-v4"` |
95
+ | SpaceInvadersNoFrameskip-v4 | 2482 | ![](results/fqf/SpaceInvaders_rew.png) | `python3 atari_fqf.py --task "SpaceInvadersNoFrameskip-v4"` |
96
+
97
+ # Rainbow (single run)
98
+
99
+ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M.
100
+
101
+ | task | best reward | reward curve | parameters |
102
+ | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ |
103
+ | PongNoFrameskip-v4 | 21 | ![](results/rainbow/Pong_rew.png) | `python3 atari_rainbow.py --task "PongNoFrameskip-v4" --batch-size 64` |
104
+ | BreakoutNoFrameskip-v4 | 684.6 | ![](results/rainbow/Breakout_rew.png) | `python3 atari_rainbow.py --task "BreakoutNoFrameskip-v4" --n-step 1` |
105
+ | EnduroNoFrameskip-v4 | 1625.9 | ![](results/rainbow/Enduro_rew.png) | `python3 atari_rainbow.py --task "EnduroNoFrameskip-v4"` |
106
+ | QbertNoFrameskip-v4 | 16192.5 | ![](results/rainbow/Qbert_rew.png) | `python3 atari_rainbow.py --task "QbertNoFrameskip-v4"` |
107
+ | MsPacmanNoFrameskip-v4 | 3101 | ![](results/rainbow/MsPacman_rew.png) | `python3 atari_rainbow.py --task "MsPacmanNoFrameskip-v4"` |
108
+ | SeaquestNoFrameskip-v4 | 2126 | ![](results/rainbow/Seaquest_rew.png) | `python3 atari_rainbow.py --task "SeaquestNoFrameskip-v4"` |
109
+ | SpaceInvadersNoFrameskip-v4 | 1794.5 | ![](results/rainbow/SpaceInvaders_rew.png) | `python3 atari_rainbow.py --task "SpaceInvadersNoFrameskip-v4"` |
110
+
111
+ # PPO (single run)
112
+
113
+ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M.
114
+
115
+ | task | best reward | reward curve | parameters |
116
+ | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ |
117
+ | PongNoFrameskip-v4 | 20.2 | ![](results/ppo/Pong_rew.png) | `python3 atari_ppo.py --task "PongNoFrameskip-v4"` |
118
+ | BreakoutNoFrameskip-v4 | 441.8 | ![](results/ppo/Breakout_rew.png) | `python3 atari_ppo.py --task "BreakoutNoFrameskip-v4"` |
119
+ | EnduroNoFrameskip-v4 | 1245.4 | ![](results/ppo/Enduro_rew.png) | `python3 atari_ppo.py --task "EnduroNoFrameskip-v4"` |
120
+ | QbertNoFrameskip-v4 | 17395 | ![](results/ppo/Qbert_rew.png) | `python3 atari_ppo.py --task "QbertNoFrameskip-v4"` |
121
+ | MsPacmanNoFrameskip-v4 | 2098 | ![](results/ppo/MsPacman_rew.png) | `python3 atari_ppo.py --task "MsPacmanNoFrameskip-v4"` |
122
+ | SeaquestNoFrameskip-v4 | 882 | ![](results/ppo/Seaquest_rew.png) | `python3 atari_ppo.py --task "SeaquestNoFrameskip-v4" --lr 1e-4` |
123
+ | SpaceInvadersNoFrameskip-v4 | 1340.5 | ![](results/ppo/SpaceInvaders_rew.png) | `python3 atari_ppo.py --task "SpaceInvadersNoFrameskip-v4"` |
124
+
125
+ # SAC (single run)
126
+
127
+ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M.
128
+
129
+ | task | best reward | reward curve | parameters |
130
+ | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ |
131
+ | PongNoFrameskip-v4 | 20.1 | ![](results/discrete_sac/Pong_rew.png) | `python3 atari_sac.py --task "PongNoFrameskip-v4"` |
132
+ | BreakoutNoFrameskip-v4 | 211.2 | ![](results/discrete_sac/Breakout_rew.png) | `python3 atari_sac.py --task "BreakoutNoFrameskip-v4" --n-step 1 --actor-lr 1e-4 --critic-lr 1e-4` |
133
+ | EnduroNoFrameskip-v4 | 1290.7 | ![](results/discrete_sac/Enduro_rew.png) | `python3 atari_sac.py --task "EnduroNoFrameskip-v4"` |
134
+ | QbertNoFrameskip-v4 | 13157.5 | ![](results/discrete_sac/Qbert_rew.png) | `python3 atari_sac.py --task "QbertNoFrameskip-v4"` |
135
+ | MsPacmanNoFrameskip-v4 | 3836 | ![](results/discrete_sac/MsPacman_rew.png) | `python3 atari_sac.py --task "MsPacmanNoFrameskip-v4"` |
136
+ | SeaquestNoFrameskip-v4 | 1772 | ![](results/discrete_sac/Seaquest_rew.png) | `python3 atari_sac.py --task "SeaquestNoFrameskip-v4"` |
137
+ | SpaceInvadersNoFrameskip-v4 | 649 | ![](results/discrete_sac/SpaceInvaders_rew.png) | `python3 atari_sac.py --task "SpaceInvadersNoFrameskip-v4"` |
examples/atari/__init__.py ADDED
File without changes
examples/atari/atari_c51.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from atari_network import C51
10
+ from atari_wrapper import make_atari_env
11
+
12
+ from tianshou.data import Collector, VectorReplayBuffer
13
+ from tianshou.highlevel.logger import LoggerFactoryDefault
14
+ from tianshou.policy import C51Policy
15
+ from tianshou.policy.base import BasePolicy
16
+ from tianshou.trainer import OffpolicyTrainer
17
+
18
+
19
+ def get_args() -> argparse.Namespace:
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
22
+ parser.add_argument("--seed", type=int, default=0)
23
+ parser.add_argument("--scale-obs", type=int, default=0)
24
+ parser.add_argument("--eps-test", type=float, default=0.005)
25
+ parser.add_argument("--eps-train", type=float, default=1.0)
26
+ parser.add_argument("--eps-train-final", type=float, default=0.05)
27
+ parser.add_argument("--buffer-size", type=int, default=100000)
28
+ parser.add_argument("--lr", type=float, default=0.0001)
29
+ parser.add_argument("--gamma", type=float, default=0.99)
30
+ parser.add_argument("--num-atoms", type=int, default=51)
31
+ parser.add_argument("--v-min", type=float, default=-10.0)
32
+ parser.add_argument("--v-max", type=float, default=10.0)
33
+ parser.add_argument("--n-step", type=int, default=3)
34
+ parser.add_argument("--target-update-freq", type=int, default=500)
35
+ parser.add_argument("--epoch", type=int, default=100)
36
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
37
+ parser.add_argument("--step-per-collect", type=int, default=10)
38
+ parser.add_argument("--update-per-step", type=float, default=0.1)
39
+ parser.add_argument("--batch-size", type=int, default=32)
40
+ parser.add_argument("--training-num", type=int, default=10)
41
+ parser.add_argument("--test-num", type=int, default=10)
42
+ parser.add_argument("--logdir", type=str, default="log")
43
+ parser.add_argument("--render", type=float, default=0.0)
44
+ parser.add_argument(
45
+ "--device",
46
+ type=str,
47
+ default="cuda" if torch.cuda.is_available() else "cpu",
48
+ )
49
+ parser.add_argument("--frames-stack", type=int, default=4)
50
+ parser.add_argument("--resume-path", type=str, default=None)
51
+ parser.add_argument("--resume-id", type=str, default=None)
52
+ parser.add_argument(
53
+ "--logger",
54
+ type=str,
55
+ default="tensorboard",
56
+ choices=["tensorboard", "wandb"],
57
+ )
58
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
59
+ parser.add_argument(
60
+ "--watch",
61
+ default=False,
62
+ action="store_true",
63
+ help="watch the play of pre-trained policy only",
64
+ )
65
+ parser.add_argument("--save-buffer-name", type=str, default=None)
66
+ return parser.parse_args()
67
+
68
+
69
+ def test_c51(args: argparse.Namespace = get_args()) -> None:
70
+ env, train_envs, test_envs = make_atari_env(
71
+ args.task,
72
+ args.seed,
73
+ args.training_num,
74
+ args.test_num,
75
+ scale=args.scale_obs,
76
+ frame_stack=args.frames_stack,
77
+ )
78
+ args.state_shape = env.observation_space.shape or env.observation_space.n
79
+ args.action_shape = env.action_space.shape or env.action_space.n
80
+ # should be N_FRAMES x H x W
81
+ print("Observations shape:", args.state_shape)
82
+ print("Actions shape:", args.action_shape)
83
+ # seed
84
+ np.random.seed(args.seed)
85
+ torch.manual_seed(args.seed)
86
+ # define model
87
+ net = C51(*args.state_shape, args.action_shape, args.num_atoms, args.device)
88
+ optim = torch.optim.Adam(net.parameters(), lr=args.lr)
89
+ # define policy
90
+ policy: C51Policy = C51Policy(
91
+ model=net,
92
+ optim=optim,
93
+ discount_factor=args.gamma,
94
+ action_space=env.action_space,
95
+ num_atoms=args.num_atoms,
96
+ v_min=args.v_min,
97
+ v_max=args.v_max,
98
+ estimation_step=args.n_step,
99
+ target_update_freq=args.target_update_freq,
100
+ ).to(args.device)
101
+ # load a previous policy
102
+ if args.resume_path:
103
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
104
+ print("Loaded agent from: ", args.resume_path)
105
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
106
+ # when you have enough RAM
107
+ buffer = VectorReplayBuffer(
108
+ args.buffer_size,
109
+ buffer_num=len(train_envs),
110
+ ignore_obs_next=True,
111
+ save_only_last_obs=True,
112
+ stack_num=args.frames_stack,
113
+ )
114
+ # collector
115
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
116
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
117
+
118
+ # log
119
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
120
+ args.algo_name = "c51"
121
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
122
+ log_path = os.path.join(args.logdir, log_name)
123
+
124
+ # logger
125
+ logger_factory = LoggerFactoryDefault()
126
+ if args.logger == "wandb":
127
+ logger_factory.logger_type = "wandb"
128
+ logger_factory.wandb_project = args.wandb_project
129
+ else:
130
+ logger_factory.logger_type = "tensorboard"
131
+
132
+ logger = logger_factory.create_logger(
133
+ log_dir=log_path,
134
+ experiment_name=log_name,
135
+ run_id=args.resume_id,
136
+ config_dict=vars(args),
137
+ )
138
+
139
+ def save_best_fn(policy: BasePolicy) -> None:
140
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
141
+
142
+ def stop_fn(mean_rewards: float) -> bool:
143
+ if env.spec.reward_threshold:
144
+ return mean_rewards >= env.spec.reward_threshold
145
+ if "Pong" in args.task:
146
+ return mean_rewards >= 20
147
+ return False
148
+
149
+ def train_fn(epoch: int, env_step: int) -> None:
150
+ # nature DQN setting, linear decay in the first 1M steps
151
+ if env_step <= 1e6:
152
+ eps = args.eps_train - env_step / 1e6 * (args.eps_train - args.eps_train_final)
153
+ else:
154
+ eps = args.eps_train_final
155
+ policy.set_eps(eps)
156
+ if env_step % 1000 == 0:
157
+ logger.write("train/env_step", env_step, {"train/eps": eps})
158
+
159
+ def test_fn(epoch: int, env_step: int | None) -> None:
160
+ policy.set_eps(args.eps_test)
161
+
162
+ # watch agent's performance
163
+ def watch() -> None:
164
+ print("Setup test envs ...")
165
+ policy.set_eps(args.eps_test)
166
+ test_envs.seed(args.seed)
167
+ if args.save_buffer_name:
168
+ print(f"Generate buffer with size {args.buffer_size}")
169
+ buffer = VectorReplayBuffer(
170
+ args.buffer_size,
171
+ buffer_num=len(test_envs),
172
+ ignore_obs_next=True,
173
+ save_only_last_obs=True,
174
+ stack_num=args.frames_stack,
175
+ )
176
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
177
+ result = collector.collect(n_step=args.buffer_size)
178
+ print(f"Save buffer into {args.save_buffer_name}")
179
+ # Unfortunately, pickle will cause oom with 1M buffer size
180
+ buffer.save_hdf5(args.save_buffer_name)
181
+ else:
182
+ print("Testing agent ...")
183
+ test_collector.reset()
184
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
185
+ result.pprint_asdict()
186
+
187
+ if args.watch:
188
+ watch()
189
+ sys.exit(0)
190
+
191
+ # test train_collector and start filling replay buffer
192
+ train_collector.reset()
193
+ train_collector.collect(n_step=args.batch_size * args.training_num)
194
+ # trainer
195
+ result = OffpolicyTrainer(
196
+ policy=policy,
197
+ train_collector=train_collector,
198
+ test_collector=test_collector,
199
+ max_epoch=args.epoch,
200
+ step_per_epoch=args.step_per_epoch,
201
+ step_per_collect=args.step_per_collect,
202
+ episode_per_test=args.test_num,
203
+ batch_size=args.batch_size,
204
+ train_fn=train_fn,
205
+ test_fn=test_fn,
206
+ stop_fn=stop_fn,
207
+ save_best_fn=save_best_fn,
208
+ logger=logger,
209
+ update_per_step=args.update_per_step,
210
+ test_in_train=False,
211
+ ).run()
212
+
213
+ pprint.pprint(result)
214
+ watch()
215
+
216
+
217
+ if __name__ == "__main__":
218
+ test_c51(get_args())
examples/atari/atari_dqn.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from atari_network import DQN
10
+ from atari_wrapper import make_atari_env
11
+
12
+ from tianshou.data import Collector, VectorReplayBuffer
13
+ from tianshou.highlevel.logger import LoggerFactoryDefault
14
+ from tianshou.policy import DQNPolicy
15
+ from tianshou.policy.base import BasePolicy
16
+ from tianshou.policy.modelbased.icm import ICMPolicy
17
+ from tianshou.trainer import OffpolicyTrainer
18
+ from tianshou.utils.net.discrete import IntrinsicCuriosityModule
19
+
20
+
21
+ def get_args() -> argparse.Namespace:
22
+ parser = argparse.ArgumentParser()
23
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
24
+ parser.add_argument("--seed", type=int, default=0)
25
+ parser.add_argument("--scale-obs", type=int, default=0)
26
+ parser.add_argument("--eps-test", type=float, default=0.005)
27
+ parser.add_argument("--eps-train", type=float, default=1.0)
28
+ parser.add_argument("--eps-train-final", type=float, default=0.05)
29
+ parser.add_argument("--buffer-size", type=int, default=100000)
30
+ parser.add_argument("--lr", type=float, default=0.0001)
31
+ parser.add_argument("--gamma", type=float, default=0.99)
32
+ parser.add_argument("--n-step", type=int, default=3)
33
+ parser.add_argument("--target-update-freq", type=int, default=500)
34
+ parser.add_argument("--epoch", type=int, default=100)
35
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
36
+ parser.add_argument("--step-per-collect", type=int, default=10)
37
+ parser.add_argument("--update-per-step", type=float, default=0.1)
38
+ parser.add_argument("--batch-size", type=int, default=32)
39
+ parser.add_argument("--training-num", type=int, default=10)
40
+ parser.add_argument("--test-num", type=int, default=10)
41
+ parser.add_argument("--logdir", type=str, default="log")
42
+ parser.add_argument("--render", type=float, default=0.0)
43
+ parser.add_argument(
44
+ "--device",
45
+ type=str,
46
+ default="cuda" if torch.cuda.is_available() else "cpu",
47
+ )
48
+ parser.add_argument("--frames-stack", type=int, default=4)
49
+ parser.add_argument("--resume-path", type=str, default=None)
50
+ parser.add_argument("--resume-id", type=str, default=None)
51
+ parser.add_argument(
52
+ "--logger",
53
+ type=str,
54
+ default="tensorboard",
55
+ choices=["tensorboard", "wandb"],
56
+ )
57
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
58
+ parser.add_argument(
59
+ "--watch",
60
+ default=False,
61
+ action="store_true",
62
+ help="watch the play of pre-trained policy only",
63
+ )
64
+ parser.add_argument("--save-buffer-name", type=str, default=None)
65
+ parser.add_argument(
66
+ "--icm-lr-scale",
67
+ type=float,
68
+ default=0.0,
69
+ help="use intrinsic curiosity module with this lr scale",
70
+ )
71
+ parser.add_argument(
72
+ "--icm-reward-scale",
73
+ type=float,
74
+ default=0.01,
75
+ help="scaling factor for intrinsic curiosity reward",
76
+ )
77
+ parser.add_argument(
78
+ "--icm-forward-loss-weight",
79
+ type=float,
80
+ default=0.2,
81
+ help="weight for the forward model loss in ICM",
82
+ )
83
+ return parser.parse_args()
84
+
85
+
86
+ def main(args: argparse.Namespace = get_args()) -> None:
87
+ env, train_envs, test_envs = make_atari_env(
88
+ args.task,
89
+ args.seed,
90
+ args.training_num,
91
+ args.test_num,
92
+ scale=args.scale_obs,
93
+ frame_stack=args.frames_stack,
94
+ )
95
+ args.state_shape = env.observation_space.shape or env.observation_space.n
96
+ args.action_shape = env.action_space.shape or env.action_space.n
97
+ # should be N_FRAMES x H x W
98
+ print("Observations shape:", args.state_shape)
99
+ print("Actions shape:", args.action_shape)
100
+ # seed
101
+ np.random.seed(args.seed)
102
+ torch.manual_seed(args.seed)
103
+ # define model
104
+ net = DQN(*args.state_shape, args.action_shape, args.device).to(args.device)
105
+ optim = torch.optim.Adam(net.parameters(), lr=args.lr)
106
+ # define policy
107
+ policy: DQNPolicy | ICMPolicy
108
+ policy = DQNPolicy(
109
+ model=net,
110
+ optim=optim,
111
+ action_space=env.action_space,
112
+ discount_factor=args.gamma,
113
+ estimation_step=args.n_step,
114
+ target_update_freq=args.target_update_freq,
115
+ )
116
+ if args.icm_lr_scale > 0:
117
+ feature_net = DQN(*args.state_shape, args.action_shape, args.device, features_only=True)
118
+ action_dim = np.prod(args.action_shape)
119
+ feature_dim = feature_net.output_dim
120
+ icm_net = IntrinsicCuriosityModule(
121
+ feature_net.net,
122
+ feature_dim,
123
+ action_dim,
124
+ hidden_sizes=[512],
125
+ device=args.device,
126
+ )
127
+ icm_optim = torch.optim.Adam(icm_net.parameters(), lr=args.lr)
128
+ policy = ICMPolicy(
129
+ policy=policy,
130
+ model=icm_net,
131
+ optim=icm_optim,
132
+ action_space=env.action_space,
133
+ lr_scale=args.icm_lr_scale,
134
+ reward_scale=args.icm_reward_scale,
135
+ forward_loss_weight=args.icm_forward_loss_weight,
136
+ ).to(args.device)
137
+ # load a previous policy
138
+ if args.resume_path:
139
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
140
+ print("Loaded agent from: ", args.resume_path)
141
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
142
+ # when you have enough RAM
143
+ buffer = VectorReplayBuffer(
144
+ args.buffer_size,
145
+ buffer_num=len(train_envs),
146
+ ignore_obs_next=True,
147
+ save_only_last_obs=True,
148
+ stack_num=args.frames_stack,
149
+ )
150
+ # collector
151
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
152
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
153
+
154
+ # log
155
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
156
+ args.algo_name = "dqn_icm" if args.icm_lr_scale > 0 else "dqn"
157
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
158
+ log_path = os.path.join(args.logdir, log_name)
159
+
160
+ # logger
161
+ logger_factory = LoggerFactoryDefault()
162
+ if args.logger == "wandb":
163
+ logger_factory.logger_type = "wandb"
164
+ logger_factory.wandb_project = args.wandb_project
165
+ else:
166
+ logger_factory.logger_type = "tensorboard"
167
+
168
+ logger = logger_factory.create_logger(
169
+ log_dir=log_path,
170
+ experiment_name=log_name,
171
+ run_id=args.resume_id,
172
+ config_dict=vars(args),
173
+ )
174
+
175
+ def save_best_fn(policy: BasePolicy) -> None:
176
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
177
+
178
+ def stop_fn(mean_rewards: float) -> bool:
179
+ if env.spec.reward_threshold:
180
+ return mean_rewards >= env.spec.reward_threshold
181
+ if "Pong" in args.task:
182
+ return mean_rewards >= 20
183
+ return False
184
+
185
+ def train_fn(epoch: int, env_step: int) -> None:
186
+ # nature DQN setting, linear decay in the first 1M steps
187
+ if env_step <= 1e6:
188
+ eps = args.eps_train - env_step / 1e6 * (args.eps_train - args.eps_train_final)
189
+ else:
190
+ eps = args.eps_train_final
191
+ policy.set_eps(eps)
192
+ if env_step % 1000 == 0:
193
+ logger.write("train/env_step", env_step, {"train/eps": eps})
194
+
195
+ def test_fn(epoch: int, env_step: int | None) -> None:
196
+ policy.set_eps(args.eps_test)
197
+
198
+ def save_checkpoint_fn(epoch: int, env_step: int, gradient_step: int) -> str:
199
+ # see also: https://pytorch.org/tutorials/beginner/saving_loading_models.html
200
+ ckpt_path = os.path.join(log_path, f"checkpoint_{epoch}.pth")
201
+ torch.save({"model": policy.state_dict()}, ckpt_path)
202
+ return ckpt_path
203
+
204
+ # watch agent's performance
205
+ def watch() -> None:
206
+ print("Setup test envs ...")
207
+ policy.set_eps(args.eps_test)
208
+ test_envs.seed(args.seed)
209
+ if args.save_buffer_name:
210
+ print(f"Generate buffer with size {args.buffer_size}")
211
+ buffer = VectorReplayBuffer(
212
+ args.buffer_size,
213
+ buffer_num=len(test_envs),
214
+ ignore_obs_next=True,
215
+ save_only_last_obs=True,
216
+ stack_num=args.frames_stack,
217
+ )
218
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
219
+ result = collector.collect(n_step=args.buffer_size)
220
+ print(f"Save buffer into {args.save_buffer_name}")
221
+ # Unfortunately, pickle will cause oom with 1M buffer size
222
+ buffer.save_hdf5(args.save_buffer_name)
223
+ else:
224
+ print("Testing agent ...")
225
+ test_collector.reset()
226
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
227
+ result.pprint_asdict()
228
+
229
+ if args.watch:
230
+ watch()
231
+ sys.exit(0)
232
+
233
+ # test train_collector and start filling replay buffer
234
+ train_collector.reset()
235
+ train_collector.collect(n_step=args.batch_size * args.training_num)
236
+ # trainer
237
+ result = OffpolicyTrainer(
238
+ policy=policy,
239
+ train_collector=train_collector,
240
+ test_collector=test_collector,
241
+ max_epoch=args.epoch,
242
+ step_per_epoch=args.step_per_epoch,
243
+ step_per_collect=args.step_per_collect,
244
+ episode_per_test=args.test_num,
245
+ batch_size=args.batch_size,
246
+ train_fn=train_fn,
247
+ test_fn=test_fn,
248
+ stop_fn=stop_fn,
249
+ save_best_fn=save_best_fn,
250
+ logger=logger,
251
+ update_per_step=args.update_per_step,
252
+ test_in_train=False,
253
+ resume_from_log=args.resume_id is not None,
254
+ save_checkpoint_fn=save_checkpoint_fn,
255
+ ).run()
256
+
257
+ pprint.pprint(result)
258
+ watch()
259
+
260
+
261
+ if __name__ == "__main__":
262
+ main(get_args())
examples/atari/atari_dqn_hl.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import os
4
+
5
+ from examples.atari.atari_network import (
6
+ IntermediateModuleFactoryAtariDQN,
7
+ IntermediateModuleFactoryAtariDQNFeatures,
8
+ )
9
+ from examples.atari.atari_wrapper import AtariEnvFactory, AtariEpochStopCallback
10
+ from tianshou.highlevel.config import SamplingConfig
11
+ from tianshou.highlevel.experiment import (
12
+ DQNExperimentBuilder,
13
+ ExperimentConfig,
14
+ )
15
+ from tianshou.highlevel.params.policy_params import DQNParams
16
+ from tianshou.highlevel.params.policy_wrapper import (
17
+ PolicyWrapperFactoryIntrinsicCuriosity,
18
+ )
19
+ from tianshou.highlevel.trainer import (
20
+ EpochTestCallbackDQNSetEps,
21
+ EpochTrainCallbackDQNEpsLinearDecay,
22
+ )
23
+ from tianshou.utils import logging
24
+ from tianshou.utils.logging import datetime_tag
25
+
26
+
27
+ def main(
28
+ experiment_config: ExperimentConfig,
29
+ task: str = "PongNoFrameskip-v4",
30
+ scale_obs: bool = False,
31
+ eps_test: float = 0.005,
32
+ eps_train: float = 1.0,
33
+ eps_train_final: float = 0.05,
34
+ buffer_size: int = 100000,
35
+ lr: float = 0.0001,
36
+ gamma: float = 0.99,
37
+ n_step: int = 3,
38
+ target_update_freq: int = 500,
39
+ epoch: int = 100,
40
+ step_per_epoch: int = 100000,
41
+ step_per_collect: int = 10,
42
+ update_per_step: float = 0.1,
43
+ batch_size: int = 32,
44
+ training_num: int = 10,
45
+ test_num: int = 10,
46
+ frames_stack: int = 4,
47
+ save_buffer_name: str | None = None, # TODO support?
48
+ icm_lr_scale: float = 0.0,
49
+ icm_reward_scale: float = 0.01,
50
+ icm_forward_loss_weight: float = 0.2,
51
+ ) -> None:
52
+ log_name = os.path.join(task, "dqn", str(experiment_config.seed), datetime_tag())
53
+
54
+ sampling_config = SamplingConfig(
55
+ num_epochs=epoch,
56
+ step_per_epoch=step_per_epoch,
57
+ batch_size=batch_size,
58
+ num_train_envs=training_num,
59
+ num_test_envs=test_num,
60
+ buffer_size=buffer_size,
61
+ step_per_collect=step_per_collect,
62
+ update_per_step=update_per_step,
63
+ repeat_per_collect=None,
64
+ replay_buffer_stack_num=frames_stack,
65
+ replay_buffer_ignore_obs_next=True,
66
+ replay_buffer_save_only_last_obs=True,
67
+ )
68
+
69
+ env_factory = AtariEnvFactory(
70
+ task,
71
+ sampling_config.train_seed,
72
+ sampling_config.test_seed,
73
+ frames_stack,
74
+ scale=scale_obs,
75
+ )
76
+
77
+ builder = (
78
+ DQNExperimentBuilder(env_factory, experiment_config, sampling_config)
79
+ .with_dqn_params(
80
+ DQNParams(
81
+ discount_factor=gamma,
82
+ estimation_step=n_step,
83
+ lr=lr,
84
+ target_update_freq=target_update_freq,
85
+ ),
86
+ )
87
+ .with_model_factory(IntermediateModuleFactoryAtariDQN())
88
+ .with_epoch_train_callback(
89
+ EpochTrainCallbackDQNEpsLinearDecay(eps_train, eps_train_final),
90
+ )
91
+ .with_epoch_test_callback(EpochTestCallbackDQNSetEps(eps_test))
92
+ .with_epoch_stop_callback(AtariEpochStopCallback(task))
93
+ )
94
+ if icm_lr_scale > 0:
95
+ builder.with_policy_wrapper_factory(
96
+ PolicyWrapperFactoryIntrinsicCuriosity(
97
+ feature_net_factory=IntermediateModuleFactoryAtariDQNFeatures(),
98
+ hidden_sizes=[512],
99
+ lr=lr,
100
+ lr_scale=icm_lr_scale,
101
+ reward_scale=icm_reward_scale,
102
+ forward_loss_weight=icm_forward_loss_weight,
103
+ ),
104
+ )
105
+
106
+ experiment = builder.build()
107
+ experiment.run(run_name=log_name)
108
+
109
+
110
+ if __name__ == "__main__":
111
+ logging.run_cli(main)
examples/atari/atari_fqf.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from atari_network import DQN
10
+ from atari_wrapper import make_atari_env
11
+
12
+ from tianshou.data import Collector, VectorReplayBuffer
13
+ from tianshou.highlevel.logger import LoggerFactoryDefault
14
+ from tianshou.policy import FQFPolicy
15
+ from tianshou.policy.base import BasePolicy
16
+ from tianshou.trainer import OffpolicyTrainer
17
+ from tianshou.utils.net.discrete import FractionProposalNetwork, FullQuantileFunction
18
+
19
+
20
+ def get_args() -> argparse.Namespace:
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
23
+ parser.add_argument("--seed", type=int, default=3128)
24
+ parser.add_argument("--scale-obs", type=int, default=0)
25
+ parser.add_argument("--eps-test", type=float, default=0.005)
26
+ parser.add_argument("--eps-train", type=float, default=1.0)
27
+ parser.add_argument("--eps-train-final", type=float, default=0.05)
28
+ parser.add_argument("--buffer-size", type=int, default=100000)
29
+ parser.add_argument("--lr", type=float, default=5e-5)
30
+ parser.add_argument("--fraction-lr", type=float, default=2.5e-9)
31
+ parser.add_argument("--gamma", type=float, default=0.99)
32
+ parser.add_argument("--num-fractions", type=int, default=32)
33
+ parser.add_argument("--num-cosines", type=int, default=64)
34
+ parser.add_argument("--ent-coef", type=float, default=10.0)
35
+ parser.add_argument("--hidden-sizes", type=int, nargs="*", default=[512])
36
+ parser.add_argument("--n-step", type=int, default=3)
37
+ parser.add_argument("--target-update-freq", type=int, default=500)
38
+ parser.add_argument("--epoch", type=int, default=100)
39
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
40
+ parser.add_argument("--step-per-collect", type=int, default=10)
41
+ parser.add_argument("--update-per-step", type=float, default=0.1)
42
+ parser.add_argument("--batch-size", type=int, default=32)
43
+ parser.add_argument("--training-num", type=int, default=10)
44
+ parser.add_argument("--test-num", type=int, default=10)
45
+ parser.add_argument("--logdir", type=str, default="log")
46
+ parser.add_argument("--render", type=float, default=0.0)
47
+ parser.add_argument(
48
+ "--device",
49
+ type=str,
50
+ default="cuda" if torch.cuda.is_available() else "cpu",
51
+ )
52
+ parser.add_argument("--frames-stack", type=int, default=4)
53
+ parser.add_argument("--resume-path", type=str, default=None)
54
+ parser.add_argument("--resume-id", type=str, default=None)
55
+ parser.add_argument(
56
+ "--logger",
57
+ type=str,
58
+ default="tensorboard",
59
+ choices=["tensorboard", "wandb"],
60
+ )
61
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
62
+ parser.add_argument(
63
+ "--watch",
64
+ default=False,
65
+ action="store_true",
66
+ help="watch the play of pre-trained policy only",
67
+ )
68
+ parser.add_argument("--save-buffer-name", type=str, default=None)
69
+ return parser.parse_args()
70
+
71
+
72
+ def test_fqf(args: argparse.Namespace = get_args()) -> None:
73
+ env, train_envs, test_envs = make_atari_env(
74
+ args.task,
75
+ args.seed,
76
+ args.training_num,
77
+ args.test_num,
78
+ scale=args.scale_obs,
79
+ frame_stack=args.frames_stack,
80
+ )
81
+ args.state_shape = env.observation_space.shape or env.observation_space.n
82
+ args.action_shape = env.action_space.shape or env.action_space.n
83
+ # should be N_FRAMES x H x W
84
+ print("Observations shape:", args.state_shape)
85
+ print("Actions shape:", args.action_shape)
86
+ # seed
87
+ np.random.seed(args.seed)
88
+ torch.manual_seed(args.seed)
89
+ # define model
90
+ feature_net = DQN(*args.state_shape, args.action_shape, args.device, features_only=True)
91
+ net = FullQuantileFunction(
92
+ feature_net,
93
+ args.action_shape,
94
+ args.hidden_sizes,
95
+ args.num_cosines,
96
+ device=args.device,
97
+ ).to(args.device)
98
+ optim = torch.optim.Adam(net.parameters(), lr=args.lr)
99
+ fraction_net = FractionProposalNetwork(args.num_fractions, net.input_dim)
100
+ fraction_optim = torch.optim.RMSprop(fraction_net.parameters(), lr=args.fraction_lr)
101
+ # define policy
102
+ policy: FQFPolicy = FQFPolicy(
103
+ model=net,
104
+ optim=optim,
105
+ fraction_model=fraction_net,
106
+ fraction_optim=fraction_optim,
107
+ action_space=env.action_space,
108
+ discount_factor=args.gamma,
109
+ num_fractions=args.num_fractions,
110
+ ent_coef=args.ent_coef,
111
+ estimation_step=args.n_step,
112
+ target_update_freq=args.target_update_freq,
113
+ ).to(args.device)
114
+ # load a previous policy
115
+ if args.resume_path:
116
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
117
+ print("Loaded agent from: ", args.resume_path)
118
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
119
+ # when you have enough RAM
120
+ buffer = VectorReplayBuffer(
121
+ args.buffer_size,
122
+ buffer_num=len(train_envs),
123
+ ignore_obs_next=True,
124
+ save_only_last_obs=True,
125
+ stack_num=args.frames_stack,
126
+ )
127
+ # collector
128
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
129
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
130
+
131
+ # log
132
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
133
+ args.algo_name = "fqf"
134
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
135
+ log_path = os.path.join(args.logdir, log_name)
136
+
137
+ # logger
138
+ logger_factory = LoggerFactoryDefault()
139
+ if args.logger == "wandb":
140
+ logger_factory.logger_type = "wandb"
141
+ logger_factory.wandb_project = args.wandb_project
142
+ else:
143
+ logger_factory.logger_type = "tensorboard"
144
+
145
+ logger = logger_factory.create_logger(
146
+ log_dir=log_path,
147
+ experiment_name=log_name,
148
+ run_id=args.resume_id,
149
+ config_dict=vars(args),
150
+ )
151
+
152
+ def save_best_fn(policy: BasePolicy) -> None:
153
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
154
+
155
+ def stop_fn(mean_rewards: float) -> bool:
156
+ if env.spec.reward_threshold:
157
+ return mean_rewards >= env.spec.reward_threshold
158
+ if "Pong" in args.task:
159
+ return mean_rewards >= 20
160
+ return False
161
+
162
+ def train_fn(epoch: int, env_step: int) -> None:
163
+ # nature DQN setting, linear decay in the first 1M steps
164
+ if env_step <= 1e6:
165
+ eps = args.eps_train - env_step / 1e6 * (args.eps_train - args.eps_train_final)
166
+ else:
167
+ eps = args.eps_train_final
168
+ policy.set_eps(eps)
169
+ if env_step % 1000 == 0:
170
+ logger.write("train/env_step", env_step, {"train/eps": eps})
171
+
172
+ def test_fn(epoch: int, env_step: int | None) -> None:
173
+ policy.set_eps(args.eps_test)
174
+
175
+ # watch agent's performance
176
+ def watch() -> None:
177
+ print("Setup test envs ...")
178
+ policy.set_eps(args.eps_test)
179
+ test_envs.seed(args.seed)
180
+ if args.save_buffer_name:
181
+ print(f"Generate buffer with size {args.buffer_size}")
182
+ buffer = VectorReplayBuffer(
183
+ args.buffer_size,
184
+ buffer_num=len(test_envs),
185
+ ignore_obs_next=True,
186
+ save_only_last_obs=True,
187
+ stack_num=args.frames_stack,
188
+ )
189
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
190
+ result = collector.collect(n_step=args.buffer_size)
191
+ print(f"Save buffer into {args.save_buffer_name}")
192
+ # Unfortunately, pickle will cause oom with 1M buffer size
193
+ buffer.save_hdf5(args.save_buffer_name)
194
+ else:
195
+ print("Testing agent ...")
196
+ test_collector.reset()
197
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
198
+ result.pprint_asdict()
199
+
200
+ if args.watch:
201
+ watch()
202
+ sys.exit(0)
203
+
204
+ # test train_collector and start filling replay buffer
205
+ train_collector.reset()
206
+ train_collector.collect(n_step=args.batch_size * args.training_num)
207
+ # trainer
208
+ result = OffpolicyTrainer(
209
+ policy=policy,
210
+ train_collector=train_collector,
211
+ test_collector=test_collector,
212
+ max_epoch=args.epoch,
213
+ step_per_epoch=args.step_per_epoch,
214
+ step_per_collect=args.step_per_collect,
215
+ episode_per_test=args.test_num,
216
+ batch_size=args.batch_size,
217
+ train_fn=train_fn,
218
+ test_fn=test_fn,
219
+ stop_fn=stop_fn,
220
+ save_best_fn=save_best_fn,
221
+ logger=logger,
222
+ update_per_step=args.update_per_step,
223
+ test_in_train=False,
224
+ ).run()
225
+
226
+ pprint.pprint(result)
227
+ watch()
228
+
229
+
230
+ if __name__ == "__main__":
231
+ test_fqf(get_args())
examples/atari/atari_fqf_rainbow.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ # import numpy as np
8
+ import torch
9
+ from atari_network import DQN
10
+ from atari_wrapper import make_atari_env
11
+
12
+ from tianshou.data import Collector, PrioritizedVectorReplayBuffer, VectorReplayBuffer
13
+ from tianshou.highlevel.logger import LoggerFactoryDefault
14
+ from tianshou.policy import FQFPolicy,FQF_RainbowPolicy
15
+ from tianshou.policy.base import BasePolicy
16
+ from tianshou.trainer import OffpolicyTrainer
17
+ from tianshou.utils.net.discrete import FractionProposalNetwork, FullQuantileFunction, FullQuantileFunctionRainbow
18
+
19
+
20
+ def get_args() -> argparse.Namespace:
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
23
+ parser.add_argument("--algo-name", type=str, default="RainbowFQF")
24
+ parser.add_argument("--seed", type=int, default=3128)
25
+ parser.add_argument("--scale-obs", type=int, default=0)
26
+ parser.add_argument("--eps-test", type=float, default=0.005)
27
+ parser.add_argument("--eps-train", type=float, default=1.0)
28
+ parser.add_argument("--eps-train-final", type=float, default=0.05)
29
+ parser.add_argument("--buffer-size", type=int, default=100000)
30
+ parser.add_argument("--lr", type=float, default=5e-5)
31
+ parser.add_argument("--fraction-lr", type=float, default=2.5e-9)
32
+ parser.add_argument("--gamma", type=float, default=0.99)
33
+ parser.add_argument("--num-fractions", type=int, default=32)
34
+ parser.add_argument("--num-cosines", type=int, default=64)
35
+ parser.add_argument("--ent-coef", type=float, default=10.0)
36
+ parser.add_argument("--hidden-sizes", type=int, nargs="*", default=[512])
37
+ parser.add_argument("--n-step", type=int, default=3)
38
+ parser.add_argument("--target-update-freq", type=int, default=500)
39
+ parser.add_argument("--epoch", type=int, default=100)
40
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
41
+ parser.add_argument("--step-per-collect", type=int, default=10)
42
+ parser.add_argument("--update-per-step", type=float, default=0.1)
43
+ parser.add_argument("--batch-size", type=int, default=32)
44
+ parser.add_argument("--training-num", type=int, default=10)
45
+ parser.add_argument("--test-num", type=int, default=10)
46
+ parser.add_argument("--logdir", type=str, default="log")
47
+ #rainbow elements
48
+ parser.add_argument("--no-dueling", action="store_true", default=False)
49
+ parser.add_argument("--no-noisy", action="store_true", default=False)
50
+ parser.add_argument("--no-priority", action="store_true", default=False)
51
+ parser.add_argument("--noisy-std", type=float, default=0.1)
52
+ parser.add_argument("--alpha", type=float, default=0.5)
53
+ parser.add_argument("--beta", type=float, default=0.4)
54
+ parser.add_argument("--beta-final", type=float, default=1.0)
55
+ parser.add_argument("--beta-anneal-step", type=int, default=5000000)
56
+ parser.add_argument("--no-weight-norm", action="store_true", default=False)
57
+
58
+
59
+ parser.add_argument("--render", type=float, default=0.0)
60
+ parser.add_argument(
61
+ "--device",
62
+ type=str,
63
+ default="cuda" if torch.cuda.is_available() else "cpu",
64
+ )
65
+ parser.add_argument("--frames-stack", type=int, default=4)
66
+ parser.add_argument("--resume-path", type=str, default=None)
67
+ parser.add_argument("--resume-id", type=str, default=None)
68
+ parser.add_argument(
69
+ "--logger",
70
+ type=str,
71
+ default="tensorboard",
72
+ choices=["tensorboard", "wandb"],
73
+ )
74
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
75
+ parser.add_argument(
76
+ "--watch",
77
+ default=False,
78
+ action="store_true",
79
+ help="watch the play of pre-trained policy only",
80
+ )
81
+ parser.add_argument("--save-buffer-name", type=str, default=None)
82
+ parser.add_argument("--per", type=bool, default=False)
83
+ return parser.parse_args()
84
+
85
+
86
+ def test_fqf(args: argparse.Namespace = get_args()) -> None:
87
+ env, train_envs, test_envs = make_atari_env(
88
+ args.task,
89
+ args.seed,
90
+ args.training_num,
91
+ args.test_num,
92
+ scale=args.scale_obs,
93
+ frame_stack=args.frames_stack,
94
+ )
95
+ args.state_shape = env.observation_space.shape or env.observation_space.n
96
+ args.action_shape = env.action_space.shape or env.action_space.n
97
+ # should be N_FRAMES x H x W
98
+ print("Observations shape:", args.state_shape)
99
+ print("Actions shape:", args.action_shape)
100
+ # seed
101
+ # np.random.seed(args.seed)
102
+ torch.manual_seed(args.seed)
103
+ # define model
104
+ feature_net = DQN(*args.state_shape, args.action_shape, args.device, features_only=True)
105
+ preprocess_net_output_dim = feature_net.output_dim # Ensure this is correctly set
106
+ print(preprocess_net_output_dim)
107
+ net = FullQuantileFunctionRainbow(
108
+ preprocess_net=feature_net,
109
+ action_shape=args.action_shape,
110
+ hidden_sizes=args.hidden_sizes,
111
+ num_cosines=args.num_cosines,
112
+ preprocess_net_output_dim=preprocess_net_output_dim,
113
+ device=args.device,
114
+ noisy_std = args.noisy_std,
115
+ is_noisy=not args.no_noisy, # Set to True to use noisy layers
116
+ is_dueling = not args.no_dueling, # Set to True to use noisy layers
117
+ ).to(args.device)
118
+ print(net)
119
+ optim = torch.optim.Adam(net.parameters(), lr=args.lr)
120
+ fraction_net = FractionProposalNetwork(args.num_fractions, net.input_dim)
121
+ fraction_optim = torch.optim.RMSprop(fraction_net.parameters(), lr=args.fraction_lr)
122
+ # define policy
123
+ policy: FQF_RainbowPolicy = FQF_RainbowPolicy(
124
+ model=net,
125
+ optim=optim,
126
+ fraction_model=fraction_net,
127
+ fraction_optim=fraction_optim,
128
+ action_space=env.action_space,
129
+ discount_factor=args.gamma,
130
+ num_fractions=args.num_fractions,
131
+ ent_coef=args.ent_coef,
132
+ estimation_step=args.n_step,
133
+ target_update_freq=args.target_update_freq,
134
+ is_noisy=not args.no_noisy
135
+ ).to(args.device)
136
+ # load a previous policy
137
+ if args.resume_path:
138
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
139
+ print("Loaded agent from: ", args.resume_path)
140
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
141
+ # when you have enough RAM
142
+ buffer: VectorReplayBuffer | PrioritizedVectorReplayBuffer
143
+ if args.no_priority:
144
+ buffer = VectorReplayBuffer(
145
+ args.buffer_size,
146
+ buffer_num=len(train_envs),
147
+ ignore_obs_next=True,
148
+ save_only_last_obs=True,
149
+ stack_num=args.frames_stack,
150
+ )
151
+ else:
152
+ print("Using PER")
153
+ buffer = PrioritizedVectorReplayBuffer(
154
+ args.buffer_size,
155
+ buffer_num=len(train_envs),
156
+ ignore_obs_next=True,
157
+ save_only_last_obs=True,
158
+ stack_num=args.frames_stack,
159
+ alpha=args.alpha,
160
+ beta=args.beta,
161
+ weight_norm=not args.no_weight_norm,
162
+ )
163
+ print("PER as buffer")
164
+ # collector
165
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
166
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
167
+
168
+ # log
169
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
170
+ # args.algo_name = "fqf_per_noisy"
171
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
172
+ log_path = os.path.join(args.logdir, log_name)
173
+
174
+ # logger
175
+ logger_factory = LoggerFactoryDefault()
176
+ if args.logger == "wandb":
177
+ logger_factory.logger_type = "wandb"
178
+ logger_factory.wandb_project = args.wandb_project
179
+ else:
180
+ logger_factory.logger_type = "tensorboard"
181
+
182
+ logger = logger_factory.create_logger(
183
+ log_dir=log_path,
184
+ experiment_name=log_name,
185
+ run_id=args.resume_id,
186
+ config_dict=vars(args),
187
+ )
188
+
189
+ def save_best_fn(policy: BasePolicy) -> None:
190
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
191
+
192
+ def stop_fn(mean_rewards: float) -> bool:
193
+ if env.spec.reward_threshold:
194
+ return mean_rewards >= env.spec.reward_threshold
195
+ if "Pong" in args.task:
196
+ return mean_rewards >= 20
197
+ return False
198
+
199
+ def train_fn(epoch: int, env_step: int) -> None:
200
+ # nature DQN setting, linear decay in the first 1M steps
201
+ if env_step <= 1e6:
202
+ eps = args.eps_train - env_step / 1e6 * (args.eps_train - args.eps_train_final)
203
+ else:
204
+ eps = args.eps_train_final
205
+ policy.set_eps(eps)
206
+ if env_step % 1000 == 0:
207
+ logger.write("train/env_step", env_step, {"train/eps": eps})
208
+ if not args.no_priority:
209
+ if env_step <= args.beta_anneal_step:
210
+ beta = args.beta - env_step / args.beta_anneal_step * (args.beta - args.beta_final)
211
+ # print("beta updated - anneal")
212
+ else:
213
+ beta = args.beta_final
214
+ # print("beta updated - final")
215
+ buffer.set_beta(beta)
216
+ if env_step % 1000 == 0:
217
+ logger.write("train/env_step", env_step, {"train/beta": beta})
218
+
219
+ def test_fn(epoch: int, env_step: int | None) -> None:
220
+ policy.set_eps(args.eps_test)
221
+
222
+ # watch agent's performance
223
+ def watch() -> None:
224
+ print("Setup test envs ...")
225
+ policy.eval()
226
+ policy.set_eps(args.eps_test)
227
+ test_envs.seed(args.seed)
228
+ if args.save_buffer_name:
229
+ print(f"Generate buffer with size {args.buffer_size}")
230
+ # buffer = VectorReplayBuffer(
231
+ # args.buffer_size,
232
+ # buffer_num=len(test_envs),
233
+ # ignore_obs_next=True,
234
+ # save_only_last_obs=True,
235
+ # stack_num=args.frames_stack,
236
+ # )
237
+ buffer = PrioritizedVectorReplayBuffer(
238
+ args.buffer_size,
239
+ buffer_num=len(test_envs),
240
+ ignore_obs_next=True,
241
+ save_only_last_obs=True,
242
+ stack_num=args.frames_stack,
243
+ alpha=args.alpha,
244
+ beta=args.beta,
245
+ )
246
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
247
+ result = collector.collect(n_step=args.buffer_size)
248
+ print(f"Save buffer into {args.save_buffer_name}")
249
+ # Unfortunately, pickle will cause oom with 1M buffer size
250
+ buffer.save_hdf5(args.save_buffer_name)
251
+ else:
252
+ print("Testing agent ...")
253
+ test_collector.reset()
254
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
255
+ result.pprint_asdict()
256
+
257
+ if args.watch:
258
+ watch()
259
+ sys.exit(0)
260
+
261
+ # test train_collector and start filling replay buffer
262
+ train_collector.reset()
263
+ train_collector.collect(n_step=args.batch_size * args.training_num)
264
+ # trainer
265
+ result = OffpolicyTrainer(
266
+ policy=policy,
267
+ train_collector=train_collector,
268
+ test_collector=test_collector,
269
+ max_epoch=args.epoch,
270
+ step_per_epoch=args.step_per_epoch,
271
+ step_per_collect=args.step_per_collect,
272
+ episode_per_test=args.test_num,
273
+ batch_size=args.batch_size,
274
+ train_fn=train_fn,
275
+ test_fn=test_fn,
276
+ stop_fn=stop_fn,
277
+ save_best_fn=save_best_fn,
278
+ logger=logger,
279
+ update_per_step=args.update_per_step,
280
+ test_in_train=False,
281
+ ).run()
282
+
283
+ pprint.pprint(result)
284
+ watch()
285
+
286
+
287
+ if __name__ == "__main__":
288
+ test_fqf(get_args())
examples/atari/atari_iqn.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from atari_network import DQN
10
+ from atari_wrapper import make_atari_env
11
+
12
+ from tianshou.data import Collector, VectorReplayBuffer
13
+ from tianshou.highlevel.logger import LoggerFactoryDefault
14
+ from tianshou.policy import IQNPolicy
15
+ from tianshou.policy.base import BasePolicy
16
+ from tianshou.trainer import OffpolicyTrainer
17
+ from tianshou.utils.net.discrete import ImplicitQuantileNetwork
18
+
19
+
20
+ def get_args() -> argparse.Namespace:
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
23
+ parser.add_argument("--seed", type=int, default=1234)
24
+ parser.add_argument("--scale-obs", type=int, default=0)
25
+ parser.add_argument("--eps-test", type=float, default=0.005)
26
+ parser.add_argument("--eps-train", type=float, default=1.0)
27
+ parser.add_argument("--eps-train-final", type=float, default=0.05)
28
+ parser.add_argument("--buffer-size", type=int, default=100000)
29
+ parser.add_argument("--lr", type=float, default=0.0001)
30
+ parser.add_argument("--gamma", type=float, default=0.99)
31
+ parser.add_argument("--sample-size", type=int, default=32)
32
+ parser.add_argument("--online-sample-size", type=int, default=8)
33
+ parser.add_argument("--target-sample-size", type=int, default=8)
34
+ parser.add_argument("--num-cosines", type=int, default=64)
35
+ parser.add_argument("--hidden-sizes", type=int, nargs="*", default=[512])
36
+ parser.add_argument("--n-step", type=int, default=3)
37
+ parser.add_argument("--target-update-freq", type=int, default=500)
38
+ parser.add_argument("--epoch", type=int, default=100)
39
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
40
+ parser.add_argument("--step-per-collect", type=int, default=10)
41
+ parser.add_argument("--update-per-step", type=float, default=0.1)
42
+ parser.add_argument("--batch-size", type=int, default=32)
43
+ parser.add_argument("--training-num", type=int, default=10)
44
+ parser.add_argument("--test-num", type=int, default=10)
45
+ parser.add_argument("--logdir", type=str, default="log")
46
+ parser.add_argument("--render", type=float, default=0.0)
47
+ parser.add_argument(
48
+ "--device",
49
+ type=str,
50
+ default="cuda" if torch.cuda.is_available() else "cpu",
51
+ )
52
+ parser.add_argument("--frames-stack", type=int, default=4)
53
+ parser.add_argument("--resume-path", type=str, default=None)
54
+ parser.add_argument("--resume-id", type=str, default=None)
55
+ parser.add_argument(
56
+ "--logger",
57
+ type=str,
58
+ default="tensorboard",
59
+ choices=["tensorboard", "wandb"],
60
+ )
61
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
62
+ parser.add_argument(
63
+ "--watch",
64
+ default=False,
65
+ action="store_true",
66
+ help="watch the play of pre-trained policy only",
67
+ )
68
+ parser.add_argument("--save-buffer-name", type=str, default=None)
69
+ return parser.parse_args()
70
+
71
+
72
+ def test_iqn(args: argparse.Namespace = get_args()) -> None:
73
+ env, train_envs, test_envs = make_atari_env(
74
+ args.task,
75
+ args.seed,
76
+ args.training_num,
77
+ args.test_num,
78
+ scale=args.scale_obs,
79
+ frame_stack=args.frames_stack,
80
+ )
81
+ args.state_shape = env.observation_space.shape or env.observation_space.n
82
+ args.action_shape = env.action_space.shape or env.action_space.n
83
+ # should be N_FRAMES x H x W
84
+ print("Observations shape:", args.state_shape)
85
+ print("Actions shape:", args.action_shape)
86
+ # seed
87
+ np.random.seed(args.seed)
88
+ torch.manual_seed(args.seed)
89
+ # define model
90
+ feature_net = DQN(*args.state_shape, args.action_shape, args.device, features_only=True)
91
+ net = ImplicitQuantileNetwork(
92
+ feature_net,
93
+ args.action_shape,
94
+ args.hidden_sizes,
95
+ num_cosines=args.num_cosines,
96
+ device=args.device,
97
+ ).to(args.device)
98
+ optim = torch.optim.Adam(net.parameters(), lr=args.lr)
99
+ # define policy
100
+ policy: IQNPolicy = IQNPolicy(
101
+ model=net,
102
+ optim=optim,
103
+ action_space=env.action_space,
104
+ discount_factor=args.gamma,
105
+ sample_size=args.sample_size,
106
+ online_sample_size=args.online_sample_size,
107
+ target_sample_size=args.target_sample_size,
108
+ estimation_step=args.n_step,
109
+ target_update_freq=args.target_update_freq,
110
+ ).to(args.device)
111
+ # load a previous policy
112
+ if args.resume_path:
113
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
114
+ print("Loaded agent from: ", args.resume_path)
115
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
116
+ # when you have enough RAM
117
+ buffer = VectorReplayBuffer(
118
+ args.buffer_size,
119
+ buffer_num=len(train_envs),
120
+ ignore_obs_next=True,
121
+ save_only_last_obs=True,
122
+ stack_num=args.frames_stack,
123
+ )
124
+ # collector
125
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
126
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
127
+
128
+ # log
129
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
130
+ args.algo_name = "iqn"
131
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
132
+ log_path = os.path.join(args.logdir, log_name)
133
+
134
+ # logger
135
+ logger_factory = LoggerFactoryDefault()
136
+ if args.logger == "wandb":
137
+ logger_factory.logger_type = "wandb"
138
+ logger_factory.wandb_project = args.wandb_project
139
+ else:
140
+ logger_factory.logger_type = "tensorboard"
141
+
142
+ logger = logger_factory.create_logger(
143
+ log_dir=log_path,
144
+ experiment_name=log_name,
145
+ run_id=args.resume_id,
146
+ config_dict=vars(args),
147
+ )
148
+
149
+ def save_best_fn(policy: BasePolicy) -> None:
150
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
151
+
152
+ def stop_fn(mean_rewards: float) -> bool:
153
+ if env.spec.reward_threshold:
154
+ return mean_rewards >= env.spec.reward_threshold
155
+ if "Pong" in args.task:
156
+ return mean_rewards >= 20
157
+ return False
158
+
159
+ def train_fn(epoch: int, env_step: int) -> None:
160
+ # nature DQN setting, linear decay in the first 1M steps
161
+ if env_step <= 1e6:
162
+ eps = args.eps_train - env_step / 1e6 * (args.eps_train - args.eps_train_final)
163
+ else:
164
+ eps = args.eps_train_final
165
+ policy.set_eps(eps)
166
+ if env_step % 1000 == 0:
167
+ logger.write("train/env_step", env_step, {"train/eps": eps})
168
+
169
+ def test_fn(epoch: int, env_step: int | None) -> None:
170
+ policy.set_eps(args.eps_test)
171
+
172
+ # watch agent's performance
173
+ def watch() -> None:
174
+ print("Setup test envs ...")
175
+ policy.set_eps(args.eps_test)
176
+ test_envs.seed(args.seed)
177
+ if args.save_buffer_name:
178
+ print(f"Generate buffer with size {args.buffer_size}")
179
+ buffer = VectorReplayBuffer(
180
+ args.buffer_size,
181
+ buffer_num=len(test_envs),
182
+ ignore_obs_next=True,
183
+ save_only_last_obs=True,
184
+ stack_num=args.frames_stack,
185
+ )
186
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
187
+ result = collector.collect(n_step=args.buffer_size)
188
+ print(f"Save buffer into {args.save_buffer_name}")
189
+ # Unfortunately, pickle will cause oom with 1M buffer size
190
+ buffer.save_hdf5(args.save_buffer_name)
191
+ else:
192
+ print("Testing agent ...")
193
+ test_collector.reset()
194
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
195
+ result.pprint_asdict()
196
+
197
+ if args.watch:
198
+ watch()
199
+ sys.exit(0)
200
+
201
+ # test train_collector and start filling replay buffer
202
+ train_collector.reset()
203
+ train_collector.collect(n_step=args.batch_size * args.training_num)
204
+ # trainer
205
+
206
+ result = OffpolicyTrainer(
207
+ policy=policy,
208
+ train_collector=train_collector,
209
+ test_collector=test_collector,
210
+ max_epoch=args.epoch,
211
+ step_per_epoch=args.step_per_epoch,
212
+ step_per_collect=args.step_per_collect,
213
+ episode_per_test=args.test_num,
214
+ batch_size=args.batch_size,
215
+ train_fn=train_fn,
216
+ test_fn=test_fn,
217
+ stop_fn=stop_fn,
218
+ save_best_fn=save_best_fn,
219
+ logger=logger,
220
+ update_per_step=args.update_per_step,
221
+ test_in_train=False,
222
+ ).run()
223
+
224
+ pprint.pprint(result)
225
+ watch()
226
+
227
+
228
+ if __name__ == "__main__":
229
+ test_iqn(get_args())
examples/atari/atari_iqn_hl.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import os
4
+ from collections.abc import Sequence
5
+
6
+ from examples.atari.atari_network import (
7
+ IntermediateModuleFactoryAtariDQN,
8
+ )
9
+ from examples.atari.atari_wrapper import AtariEnvFactory, AtariEpochStopCallback
10
+ from tianshou.highlevel.config import SamplingConfig
11
+ from tianshou.highlevel.experiment import (
12
+ ExperimentConfig,
13
+ IQNExperimentBuilder,
14
+ )
15
+ from tianshou.highlevel.params.policy_params import IQNParams
16
+ from tianshou.highlevel.trainer import (
17
+ EpochTestCallbackDQNSetEps,
18
+ EpochTrainCallbackDQNEpsLinearDecay,
19
+ )
20
+ from tianshou.utils import logging
21
+ from tianshou.utils.logging import datetime_tag
22
+
23
+
24
+ def main(
25
+ experiment_config: ExperimentConfig,
26
+ task: str = "PongNoFrameskip-v4",
27
+ scale_obs: bool = False,
28
+ eps_test: float = 0.005,
29
+ eps_train: float = 1.0,
30
+ eps_train_final: float = 0.05,
31
+ buffer_size: int = 100000,
32
+ lr: float = 0.0001,
33
+ gamma: float = 0.99,
34
+ sample_size: int = 32,
35
+ online_sample_size: int = 8,
36
+ target_sample_size: int = 8,
37
+ num_cosines: int = 64,
38
+ hidden_sizes: Sequence[int] = (512,),
39
+ n_step: int = 3,
40
+ target_update_freq: int = 500,
41
+ epoch: int = 100,
42
+ step_per_epoch: int = 100000,
43
+ step_per_collect: int = 10,
44
+ update_per_step: float = 0.1,
45
+ batch_size: int = 32,
46
+ training_num: int = 10,
47
+ test_num: int = 10,
48
+ frames_stack: int = 4,
49
+ save_buffer_name: str | None = None, # TODO support?
50
+ ) -> None:
51
+ log_name = os.path.join(task, "iqn", str(experiment_config.seed), datetime_tag())
52
+
53
+ sampling_config = SamplingConfig(
54
+ num_epochs=epoch,
55
+ step_per_epoch=step_per_epoch,
56
+ batch_size=batch_size,
57
+ num_train_envs=training_num,
58
+ num_test_envs=test_num,
59
+ buffer_size=buffer_size,
60
+ step_per_collect=step_per_collect,
61
+ update_per_step=update_per_step,
62
+ repeat_per_collect=None,
63
+ replay_buffer_stack_num=frames_stack,
64
+ replay_buffer_ignore_obs_next=True,
65
+ replay_buffer_save_only_last_obs=True,
66
+ )
67
+
68
+ env_factory = AtariEnvFactory(
69
+ task,
70
+ sampling_config.train_seed,
71
+ sampling_config.test_seed,
72
+ frames_stack,
73
+ scale=scale_obs,
74
+ )
75
+
76
+ experiment = (
77
+ IQNExperimentBuilder(env_factory, experiment_config, sampling_config)
78
+ .with_iqn_params(
79
+ IQNParams(
80
+ discount_factor=gamma,
81
+ estimation_step=n_step,
82
+ lr=lr,
83
+ sample_size=sample_size,
84
+ online_sample_size=online_sample_size,
85
+ target_update_freq=target_update_freq,
86
+ target_sample_size=target_sample_size,
87
+ hidden_sizes=hidden_sizes,
88
+ num_cosines=num_cosines,
89
+ ),
90
+ )
91
+ .with_preprocess_network_factory(IntermediateModuleFactoryAtariDQN(features_only=True))
92
+ .with_epoch_train_callback(
93
+ EpochTrainCallbackDQNEpsLinearDecay(eps_train, eps_train_final),
94
+ )
95
+ .with_epoch_test_callback(EpochTestCallbackDQNSetEps(eps_test))
96
+ .with_epoch_stop_callback(AtariEpochStopCallback(task))
97
+ .build()
98
+ )
99
+ experiment.run(run_name=log_name)
100
+
101
+
102
+ if __name__ == "__main__":
103
+ logging.run_cli(main)
examples/atari/atari_network.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Callable, Sequence
2
+ from typing import Any
3
+
4
+ import numpy as np
5
+ import torch
6
+ from torch import nn
7
+
8
+ from examples.atari.tianshou.highlevel.env import Environments
9
+ from examples.atari.tianshou.highlevel.module.actor import ActorFactory
10
+ from examples.atari.tianshou.highlevel.module.core import (
11
+ TDevice,
12
+ )
13
+ from examples.atari.tianshou.highlevel.module.intermediate import (
14
+ IntermediateModule,
15
+ IntermediateModuleFactory,
16
+ )
17
+ from examples.atari.tianshou.utils.net.common import NetBase
18
+ from examples.atari.tianshou.utils.net.discrete import Actor, NoisyLinear
19
+
20
+
21
+ def layer_init(layer: nn.Module, std: float = np.sqrt(2), bias_const: float = 0.0) -> nn.Module:
22
+ torch.nn.init.orthogonal_(layer.weight, std)
23
+ torch.nn.init.constant_(layer.bias, bias_const)
24
+ return layer
25
+
26
+
27
+ class ScaledObsInputModule(torch.nn.Module):
28
+ def __init__(self, module: NetBase, denom: float = 255.0) -> None:
29
+ super().__init__()
30
+ self.module = module
31
+ self.denom = denom
32
+ # This is required such that the value can be retrieved by downstream modules (see usages of get_output_dim)
33
+ self.output_dim = module.output_dim
34
+
35
+ def forward(
36
+ self,
37
+ obs: np.ndarray | torch.Tensor,
38
+ state: Any | None = None,
39
+ info: dict[str, Any] | None = None,
40
+ ) -> tuple[torch.Tensor, Any]:
41
+ if info is None:
42
+ info = {}
43
+ return self.module.forward(obs / self.denom, state, info)
44
+
45
+
46
+ def scale_obs(module: NetBase, denom: float = 255.0) -> ScaledObsInputModule:
47
+ return ScaledObsInputModule(module, denom=denom)
48
+
49
+
50
+ class DQN(NetBase[Any]):
51
+ """Reference: Human-level control through deep reinforcement learning.
52
+
53
+ For advanced usage (how to customize the network), please refer to
54
+ :ref:`build_the_network`.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ c: int,
60
+ h: int,
61
+ w: int,
62
+ action_shape: Sequence[int] | int,
63
+ device: str | int | torch.device = "cpu",
64
+ features_only: bool = False,
65
+ output_dim_added_layer: int | None = None,
66
+ layer_init: Callable[[nn.Module], nn.Module] = lambda x: x,
67
+ ) -> None:
68
+ # TODO: Add docstring
69
+ if not features_only and output_dim_added_layer is not None:
70
+ raise ValueError(
71
+ "Should not provide explicit output dimension using `output_dim_added_layer` when `features_only` is true.",
72
+ )
73
+ super().__init__()
74
+ self.device = device
75
+ self.net = nn.Sequential(
76
+ layer_init(nn.Conv2d(c, 32, kernel_size=8, stride=4)),
77
+ nn.ReLU(inplace=True),
78
+ layer_init(nn.Conv2d(32, 64, kernel_size=4, stride=2)),
79
+ nn.ReLU(inplace=True),
80
+ layer_init(nn.Conv2d(64, 64, kernel_size=3, stride=1)),
81
+ nn.ReLU(inplace=True),
82
+ nn.Flatten(),
83
+ )
84
+ with torch.no_grad():
85
+ base_cnn_output_dim = int(np.prod(self.net(torch.zeros(1, c, h, w)).shape[1:]))
86
+ if not features_only:
87
+ action_dim = int(np.prod(action_shape))
88
+ self.net = nn.Sequential(
89
+ self.net,
90
+ layer_init(nn.Linear(base_cnn_output_dim, 512)),
91
+ nn.ReLU(inplace=True),
92
+ layer_init(nn.Linear(512, action_dim)),
93
+ )
94
+ self.output_dim = action_dim
95
+ elif output_dim_added_layer is not None:
96
+ self.net = nn.Sequential(
97
+ self.net,
98
+ layer_init(nn.Linear(base_cnn_output_dim, output_dim_added_layer)),
99
+ nn.ReLU(inplace=True),
100
+ )
101
+ self.output_dim = output_dim_added_layer
102
+ else:
103
+ self.output_dim = base_cnn_output_dim
104
+
105
+ def forward(
106
+ self,
107
+ obs: np.ndarray | torch.Tensor,
108
+ state: Any | None = None,
109
+ info: dict[str, Any] | None = None,
110
+ **kwargs: Any,
111
+ ) -> tuple[torch.Tensor, Any]:
112
+ r"""Mapping: s -> Q(s, \*)."""
113
+ obs = torch.as_tensor(obs, device=self.device, dtype=torch.float32)
114
+ return self.net(obs), state
115
+
116
+
117
+ class C51(DQN):
118
+ """Reference: A distributional perspective on reinforcement learning.
119
+
120
+ For advanced usage (how to customize the network), please refer to
121
+ :ref:`build_the_network`.
122
+ """
123
+
124
+ def __init__(
125
+ self,
126
+ c: int,
127
+ h: int,
128
+ w: int,
129
+ action_shape: Sequence[int],
130
+ num_atoms: int = 51,
131
+ device: str | int | torch.device = "cpu",
132
+ ) -> None:
133
+ self.action_num = int(np.prod(action_shape))
134
+ super().__init__(c, h, w, [self.action_num * num_atoms], device)
135
+ self.num_atoms = num_atoms
136
+
137
+ def forward(
138
+ self,
139
+ obs: np.ndarray | torch.Tensor,
140
+ state: Any | None = None,
141
+ info: dict[str, Any] | None = None,
142
+ **kwargs: Any,
143
+ ) -> tuple[torch.Tensor, Any]:
144
+ r"""Mapping: x -> Z(x, \*)."""
145
+ obs, state = super().forward(obs)
146
+ obs = obs.view(-1, self.num_atoms).softmax(dim=-1)
147
+ obs = obs.view(-1, self.action_num, self.num_atoms)
148
+ return obs, state
149
+
150
+
151
+ class Rainbow(DQN):
152
+ """Reference: Rainbow: Combining Improvements in Deep Reinforcement Learning.
153
+
154
+ For advanced usage (how to customize the network), please refer to
155
+ :ref:`build_the_network`.
156
+ """
157
+
158
+ def __init__(
159
+ self,
160
+ c: int,
161
+ h: int,
162
+ w: int,
163
+ action_shape: Sequence[int],
164
+ num_atoms: int = 51,
165
+ noisy_std: float = 0.5,
166
+ device: str | int | torch.device = "cpu",
167
+ is_dueling: bool = True,
168
+ is_noisy: bool = True,
169
+ ) -> None:
170
+ super().__init__(c, h, w, action_shape, device, features_only=True)
171
+ self.action_num = int(np.prod(action_shape))
172
+ self.num_atoms = num_atoms
173
+
174
+ def linear(x: int, y: int) -> NoisyLinear | nn.Linear:
175
+ if is_noisy:
176
+ return NoisyLinear(x, y, noisy_std)
177
+ return nn.Linear(x, y)
178
+
179
+ self.Q = nn.Sequential(
180
+ linear(self.output_dim, 512),
181
+ nn.ReLU(inplace=True),
182
+ linear(512, self.action_num * self.num_atoms),
183
+ )
184
+ self._is_dueling = is_dueling
185
+ if self._is_dueling:
186
+ self.V = nn.Sequential(
187
+ linear(self.output_dim, 512),
188
+ nn.ReLU(inplace=True),
189
+ linear(512, self.num_atoms),
190
+ )
191
+ self.output_dim = self.action_num * self.num_atoms
192
+
193
+ def forward(
194
+ self,
195
+ obs: np.ndarray | torch.Tensor,
196
+ state: Any | None = None,
197
+ info: dict[str, Any] | None = None,
198
+ **kwargs: Any,
199
+ ) -> tuple[torch.Tensor, Any]:
200
+ r"""Mapping: x -> Z(x, \*)."""
201
+ obs, state = super().forward(obs)
202
+ q = self.Q(obs)
203
+ q = q.view(-1, self.action_num, self.num_atoms)
204
+ if self._is_dueling:
205
+ v = self.V(obs)
206
+ v = v.view(-1, 1, self.num_atoms)
207
+ logits = q - q.mean(dim=1, keepdim=True) + v
208
+ else:
209
+ logits = q
210
+ probs = logits.softmax(dim=2)
211
+ return probs, state
212
+
213
+
214
+ class QRDQN(DQN):
215
+ """Reference: Distributional Reinforcement Learning with Quantile Regression.
216
+
217
+ For advanced usage (how to customize the network), please refer to
218
+ :ref:`build_the_network`.
219
+ """
220
+
221
+ def __init__(
222
+ self,
223
+ *,
224
+ c: int,
225
+ h: int,
226
+ w: int,
227
+ action_shape: Sequence[int] | int,
228
+ num_quantiles: int = 200,
229
+ device: str | int | torch.device = "cpu",
230
+ ) -> None:
231
+ self.action_num = int(np.prod(action_shape))
232
+ super().__init__(c, h, w, [self.action_num * num_quantiles], device)
233
+ self.num_quantiles = num_quantiles
234
+
235
+ def forward(
236
+ self,
237
+ obs: np.ndarray | torch.Tensor,
238
+ state: Any | None = None,
239
+ info: dict[str, Any] | None = None,
240
+ **kwargs: Any,
241
+ ) -> tuple[torch.Tensor, Any]:
242
+ r"""Mapping: x -> Z(x, \*)."""
243
+ obs, state = super().forward(obs)
244
+ obs = obs.view(-1, self.action_num, self.num_quantiles)
245
+ return obs, state
246
+
247
+
248
+ class ActorFactoryAtariDQN(ActorFactory):
249
+ def __init__(
250
+ self,
251
+ scale_obs: bool = True,
252
+ features_only: bool = False,
253
+ output_dim_added_layer: int | None = None,
254
+ ) -> None:
255
+ self.output_dim_added_layer = output_dim_added_layer
256
+ self.scale_obs = scale_obs
257
+ self.features_only = features_only
258
+
259
+ def create_module(self, envs: Environments, device: TDevice) -> Actor:
260
+ c, h, w = envs.get_observation_shape() # type: ignore # only right shape is a sequence of length 3
261
+ action_shape = envs.get_action_shape()
262
+ if isinstance(action_shape, np.int64):
263
+ action_shape = int(action_shape)
264
+ net: DQN | ScaledObsInputModule
265
+ net = DQN(
266
+ c=c,
267
+ h=h,
268
+ w=w,
269
+ action_shape=action_shape,
270
+ device=device,
271
+ features_only=self.features_only,
272
+ output_dim_added_layer=self.output_dim_added_layer,
273
+ layer_init=layer_init,
274
+ )
275
+ if self.scale_obs:
276
+ net = scale_obs(net)
277
+ return Actor(net, envs.get_action_shape(), device=device, softmax_output=False).to(device)
278
+
279
+
280
+ class IntermediateModuleFactoryAtariDQN(IntermediateModuleFactory):
281
+ def __init__(self, features_only: bool = False, net_only: bool = False) -> None:
282
+ self.features_only = features_only
283
+ self.net_only = net_only
284
+
285
+ def create_intermediate_module(self, envs: Environments, device: TDevice) -> IntermediateModule:
286
+ obs_shape = envs.get_observation_shape()
287
+ if isinstance(obs_shape, int):
288
+ obs_shape = [obs_shape]
289
+ assert len(obs_shape) == 3
290
+ c, h, w = obs_shape
291
+ action_shape = envs.get_action_shape()
292
+ if isinstance(action_shape, np.int64):
293
+ action_shape = int(action_shape)
294
+ dqn = DQN(
295
+ c=c,
296
+ h=h,
297
+ w=w,
298
+ action_shape=action_shape,
299
+ device=device,
300
+ features_only=self.features_only,
301
+ ).to(device)
302
+ module = dqn.net if self.net_only else dqn
303
+ return IntermediateModule(module, dqn.output_dim)
304
+
305
+
306
+ class IntermediateModuleFactoryAtariDQNFeatures(IntermediateModuleFactoryAtariDQN):
307
+ def __init__(self) -> None:
308
+ super().__init__(features_only=True, net_only=True)
examples/atari/atari_ppo.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from atari_network import DQN, layer_init, scale_obs
10
+ from atari_wrapper import make_atari_env
11
+ from torch.distributions import Categorical
12
+ from torch.optim.lr_scheduler import LambdaLR
13
+
14
+ from tianshou.data import Collector, VectorReplayBuffer
15
+ from tianshou.highlevel.logger import LoggerFactoryDefault
16
+ from tianshou.policy import ICMPolicy, PPOPolicy
17
+ from tianshou.policy.base import BasePolicy
18
+ from tianshou.trainer import OnpolicyTrainer
19
+ from tianshou.utils.net.common import ActorCritic
20
+ from tianshou.utils.net.discrete import Actor, Critic, IntrinsicCuriosityModule
21
+
22
+
23
+ def get_args() -> argparse.Namespace:
24
+ parser = argparse.ArgumentParser()
25
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
26
+ parser.add_argument("--seed", type=int, default=4213)
27
+ parser.add_argument("--scale-obs", type=int, default=1)
28
+ parser.add_argument("--buffer-size", type=int, default=100000)
29
+ parser.add_argument("--lr", type=float, default=2.5e-4)
30
+ parser.add_argument("--gamma", type=float, default=0.99)
31
+ parser.add_argument("--epoch", type=int, default=100)
32
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
33
+ parser.add_argument("--step-per-collect", type=int, default=1000)
34
+ parser.add_argument("--repeat-per-collect", type=int, default=4)
35
+ parser.add_argument("--batch-size", type=int, default=256)
36
+ parser.add_argument("--hidden-size", type=int, default=512)
37
+ parser.add_argument("--training-num", type=int, default=10)
38
+ parser.add_argument("--test-num", type=int, default=10)
39
+ parser.add_argument("--rew-norm", type=int, default=False)
40
+ parser.add_argument("--vf-coef", type=float, default=0.25)
41
+ parser.add_argument("--ent-coef", type=float, default=0.01)
42
+ parser.add_argument("--gae-lambda", type=float, default=0.95)
43
+ parser.add_argument("--lr-decay", type=int, default=True)
44
+ parser.add_argument("--max-grad-norm", type=float, default=0.5)
45
+ parser.add_argument("--eps-clip", type=float, default=0.1)
46
+ parser.add_argument("--dual-clip", type=float, default=None)
47
+ parser.add_argument("--value-clip", type=int, default=1)
48
+ parser.add_argument("--norm-adv", type=int, default=1)
49
+ parser.add_argument("--recompute-adv", type=int, default=0)
50
+ parser.add_argument("--logdir", type=str, default="log")
51
+ parser.add_argument("--render", type=float, default=0.0)
52
+ parser.add_argument(
53
+ "--device",
54
+ type=str,
55
+ default="cuda" if torch.cuda.is_available() else "cpu",
56
+ )
57
+ parser.add_argument("--frames-stack", type=int, default=4)
58
+ parser.add_argument("--resume-path", type=str, default=None)
59
+ parser.add_argument("--resume-id", type=str, default=None)
60
+ parser.add_argument(
61
+ "--logger",
62
+ type=str,
63
+ default="tensorboard",
64
+ choices=["tensorboard", "wandb"],
65
+ )
66
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
67
+ parser.add_argument(
68
+ "--watch",
69
+ default=False,
70
+ action="store_true",
71
+ help="watch the play of pre-trained policy only",
72
+ )
73
+ parser.add_argument("--save-buffer-name", type=str, default=None)
74
+ parser.add_argument(
75
+ "--icm-lr-scale",
76
+ type=float,
77
+ default=0.0,
78
+ help="use intrinsic curiosity module with this lr scale",
79
+ )
80
+ parser.add_argument(
81
+ "--icm-reward-scale",
82
+ type=float,
83
+ default=0.01,
84
+ help="scaling factor for intrinsic curiosity reward",
85
+ )
86
+ parser.add_argument(
87
+ "--icm-forward-loss-weight",
88
+ type=float,
89
+ default=0.2,
90
+ help="weight for the forward model loss in ICM",
91
+ )
92
+ return parser.parse_args()
93
+
94
+
95
+ def test_ppo(args: argparse.Namespace = get_args()) -> None:
96
+ env, train_envs, test_envs = make_atari_env(
97
+ args.task,
98
+ args.seed,
99
+ args.training_num,
100
+ args.test_num,
101
+ scale=0,
102
+ frame_stack=args.frames_stack,
103
+ )
104
+ args.state_shape = env.observation_space.shape or env.observation_space.n
105
+ args.action_shape = env.action_space.shape or env.action_space.n
106
+ # should be N_FRAMES x H x W
107
+ print("Observations shape:", args.state_shape)
108
+ print("Actions shape:", args.action_shape)
109
+ # seed
110
+ np.random.seed(args.seed)
111
+ torch.manual_seed(args.seed)
112
+ # define model
113
+ net = DQN(
114
+ *args.state_shape,
115
+ args.action_shape,
116
+ device=args.device,
117
+ features_only=True,
118
+ output_dim_added_layer=args.hidden_size,
119
+ layer_init=layer_init,
120
+ )
121
+ if args.scale_obs:
122
+ net = scale_obs(net)
123
+ actor = Actor(net, args.action_shape, device=args.device, softmax_output=False)
124
+ critic = Critic(net, device=args.device)
125
+ optim = torch.optim.Adam(ActorCritic(actor, critic).parameters(), lr=args.lr, eps=1e-5)
126
+
127
+ lr_scheduler = None
128
+ if args.lr_decay:
129
+ # decay learning rate to 0 linearly
130
+ max_update_num = np.ceil(args.step_per_epoch / args.step_per_collect) * args.epoch
131
+
132
+ lr_scheduler = LambdaLR(optim, lr_lambda=lambda epoch: 1 - epoch / max_update_num)
133
+
134
+ policy: PPOPolicy = PPOPolicy(
135
+ actor=actor,
136
+ critic=critic,
137
+ optim=optim,
138
+ dist_fn=Categorical,
139
+ discount_factor=args.gamma,
140
+ gae_lambda=args.gae_lambda,
141
+ max_grad_norm=args.max_grad_norm,
142
+ vf_coef=args.vf_coef,
143
+ ent_coef=args.ent_coef,
144
+ reward_normalization=args.rew_norm,
145
+ action_scaling=False,
146
+ lr_scheduler=lr_scheduler,
147
+ action_space=env.action_space,
148
+ eps_clip=args.eps_clip,
149
+ value_clip=args.value_clip,
150
+ dual_clip=args.dual_clip,
151
+ advantage_normalization=args.norm_adv,
152
+ recompute_advantage=args.recompute_adv,
153
+ ).to(args.device)
154
+ if args.icm_lr_scale > 0:
155
+ feature_net = DQN(*args.state_shape, args.action_shape, args.device, features_only=True)
156
+ action_dim = np.prod(args.action_shape)
157
+ feature_dim = feature_net.output_dim
158
+ icm_net = IntrinsicCuriosityModule(
159
+ feature_net.net,
160
+ feature_dim,
161
+ action_dim,
162
+ hidden_sizes=[args.hidden_size],
163
+ device=args.device,
164
+ )
165
+ icm_optim = torch.optim.Adam(icm_net.parameters(), lr=args.lr)
166
+ policy: ICMPolicy = ICMPolicy( # type: ignore[no-redef]
167
+ policy=policy,
168
+ model=icm_net,
169
+ optim=icm_optim,
170
+ action_space=env.action_space,
171
+ lr_scale=args.icm_lr_scale,
172
+ reward_scale=args.icm_reward_scale,
173
+ forward_loss_weight=args.icm_forward_loss_weight,
174
+ ).to(args.device)
175
+ # load a previous policy
176
+ if args.resume_path:
177
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
178
+ print("Loaded agent from: ", args.resume_path)
179
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
180
+ # when you have enough RAM
181
+ buffer = VectorReplayBuffer(
182
+ args.buffer_size,
183
+ buffer_num=len(train_envs),
184
+ ignore_obs_next=True,
185
+ save_only_last_obs=True,
186
+ stack_num=args.frames_stack,
187
+ )
188
+ # collector
189
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
190
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
191
+
192
+ # log
193
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
194
+ args.algo_name = "ppo_icm" if args.icm_lr_scale > 0 else "ppo"
195
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
196
+ log_path = os.path.join(args.logdir, log_name)
197
+
198
+ # logger
199
+ logger_factory = LoggerFactoryDefault()
200
+ if args.logger == "wandb":
201
+ logger_factory.logger_type = "wandb"
202
+ logger_factory.wandb_project = args.wandb_project
203
+ else:
204
+ logger_factory.logger_type = "tensorboard"
205
+
206
+ logger = logger_factory.create_logger(
207
+ log_dir=log_path,
208
+ experiment_name=log_name,
209
+ run_id=args.resume_id,
210
+ config_dict=vars(args),
211
+ )
212
+
213
+ def save_best_fn(policy: BasePolicy) -> None:
214
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
215
+
216
+ def stop_fn(mean_rewards: float) -> bool:
217
+ if env.spec.reward_threshold:
218
+ return mean_rewards >= env.spec.reward_threshold
219
+ if "Pong" in args.task:
220
+ return mean_rewards >= 20
221
+ return False
222
+
223
+ def save_checkpoint_fn(epoch: int, env_step: int, gradient_step: int) -> str:
224
+ # see also: https://pytorch.org/tutorials/beginner/saving_loading_models.html
225
+ ckpt_path = os.path.join(log_path, f"checkpoint_{epoch}.pth")
226
+ torch.save({"model": policy.state_dict()}, ckpt_path)
227
+ return ckpt_path
228
+
229
+ # watch agent's performance
230
+ def watch() -> None:
231
+ print("Setup test envs ...")
232
+ test_envs.seed(args.seed)
233
+ if args.save_buffer_name:
234
+ print(f"Generate buffer with size {args.buffer_size}")
235
+ buffer = VectorReplayBuffer(
236
+ args.buffer_size,
237
+ buffer_num=len(test_envs),
238
+ ignore_obs_next=True,
239
+ save_only_last_obs=True,
240
+ stack_num=args.frames_stack,
241
+ )
242
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
243
+ result = collector.collect(n_step=args.buffer_size)
244
+ print(f"Save buffer into {args.save_buffer_name}")
245
+ # Unfortunately, pickle will cause oom with 1M buffer size
246
+ buffer.save_hdf5(args.save_buffer_name)
247
+ else:
248
+ print("Testing agent ...")
249
+ test_collector.reset()
250
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
251
+ result.pprint_asdict()
252
+
253
+ if args.watch:
254
+ watch()
255
+ sys.exit(0)
256
+
257
+ # test train_collector and start filling replay buffer
258
+ train_collector.reset()
259
+ train_collector.collect(n_step=args.batch_size * args.training_num)
260
+ # trainer
261
+ result = OnpolicyTrainer(
262
+ policy=policy,
263
+ train_collector=train_collector,
264
+ test_collector=test_collector,
265
+ max_epoch=args.epoch,
266
+ step_per_epoch=args.step_per_epoch,
267
+ repeat_per_collect=args.repeat_per_collect,
268
+ episode_per_test=args.test_num,
269
+ batch_size=args.batch_size,
270
+ step_per_collect=args.step_per_collect,
271
+ stop_fn=stop_fn,
272
+ save_best_fn=save_best_fn,
273
+ logger=logger,
274
+ test_in_train=False,
275
+ resume_from_log=args.resume_id is not None,
276
+ save_checkpoint_fn=save_checkpoint_fn,
277
+ ).run()
278
+
279
+ pprint.pprint(result)
280
+ watch()
281
+
282
+
283
+ if __name__ == "__main__":
284
+ test_ppo(get_args())
examples/atari/atari_ppo_hl.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import os
4
+ from collections.abc import Sequence
5
+
6
+ from examples.atari.atari_network import (
7
+ ActorFactoryAtariDQN,
8
+ IntermediateModuleFactoryAtariDQNFeatures,
9
+ )
10
+ from examples.atari.atari_wrapper import AtariEnvFactory, AtariEpochStopCallback
11
+ from tianshou.highlevel.config import SamplingConfig
12
+ from tianshou.highlevel.experiment import (
13
+ ExperimentConfig,
14
+ PPOExperimentBuilder,
15
+ )
16
+ from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear
17
+ from tianshou.highlevel.params.policy_params import PPOParams
18
+ from tianshou.highlevel.params.policy_wrapper import (
19
+ PolicyWrapperFactoryIntrinsicCuriosity,
20
+ )
21
+ from tianshou.utils import logging
22
+ from tianshou.utils.logging import datetime_tag
23
+
24
+
25
+ def main(
26
+ experiment_config: ExperimentConfig,
27
+ task: str = "PongNoFrameskip-v4",
28
+ scale_obs: bool = True,
29
+ buffer_size: int = 100000,
30
+ lr: float = 2.5e-4,
31
+ gamma: float = 0.99,
32
+ epoch: int = 100,
33
+ step_per_epoch: int = 100000,
34
+ step_per_collect: int = 1000,
35
+ repeat_per_collect: int = 4,
36
+ batch_size: int = 256,
37
+ hidden_sizes: Sequence[int] = (512,),
38
+ training_num: int = 10,
39
+ test_num: int = 10,
40
+ rew_norm: bool = False,
41
+ vf_coef: float = 0.25,
42
+ ent_coef: float = 0.01,
43
+ gae_lambda: float = 0.95,
44
+ lr_decay: bool = True,
45
+ max_grad_norm: float = 0.5,
46
+ eps_clip: float = 0.1,
47
+ dual_clip: float | None = None,
48
+ value_clip: bool = True,
49
+ norm_adv: bool = True,
50
+ recompute_adv: bool = False,
51
+ frames_stack: int = 4,
52
+ save_buffer_name: str | None = None, # TODO add support in high-level API?
53
+ icm_lr_scale: float = 0.0,
54
+ icm_reward_scale: float = 0.01,
55
+ icm_forward_loss_weight: float = 0.2,
56
+ ) -> None:
57
+ log_name = os.path.join(task, "ppo", str(experiment_config.seed), datetime_tag())
58
+
59
+ sampling_config = SamplingConfig(
60
+ num_epochs=epoch,
61
+ step_per_epoch=step_per_epoch,
62
+ batch_size=batch_size,
63
+ num_train_envs=training_num,
64
+ num_test_envs=test_num,
65
+ buffer_size=buffer_size,
66
+ step_per_collect=step_per_collect,
67
+ repeat_per_collect=repeat_per_collect,
68
+ replay_buffer_stack_num=frames_stack,
69
+ replay_buffer_ignore_obs_next=True,
70
+ replay_buffer_save_only_last_obs=True,
71
+ )
72
+
73
+ env_factory = AtariEnvFactory(
74
+ task,
75
+ sampling_config.train_seed,
76
+ sampling_config.test_seed,
77
+ frames_stack,
78
+ scale=scale_obs,
79
+ )
80
+
81
+ builder = (
82
+ PPOExperimentBuilder(env_factory, experiment_config, sampling_config)
83
+ .with_ppo_params(
84
+ PPOParams(
85
+ discount_factor=gamma,
86
+ gae_lambda=gae_lambda,
87
+ reward_normalization=rew_norm,
88
+ ent_coef=ent_coef,
89
+ vf_coef=vf_coef,
90
+ max_grad_norm=max_grad_norm,
91
+ value_clip=value_clip,
92
+ advantage_normalization=norm_adv,
93
+ eps_clip=eps_clip,
94
+ dual_clip=dual_clip,
95
+ recompute_advantage=recompute_adv,
96
+ lr=lr,
97
+ lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config)
98
+ if lr_decay
99
+ else None,
100
+ ),
101
+ )
102
+ .with_actor_factory(ActorFactoryAtariDQN(scale_obs=scale_obs, features_only=True))
103
+ .with_critic_factory_use_actor()
104
+ .with_epoch_stop_callback(AtariEpochStopCallback(task))
105
+ )
106
+ if icm_lr_scale > 0:
107
+ builder.with_policy_wrapper_factory(
108
+ PolicyWrapperFactoryIntrinsicCuriosity(
109
+ feature_net_factory=IntermediateModuleFactoryAtariDQNFeatures(),
110
+ hidden_sizes=hidden_sizes,
111
+ lr=lr,
112
+ lr_scale=icm_lr_scale,
113
+ reward_scale=icm_reward_scale,
114
+ forward_loss_weight=icm_forward_loss_weight,
115
+ ),
116
+ )
117
+ experiment = builder.build()
118
+ experiment.run(run_name=log_name)
119
+
120
+
121
+ if __name__ == "__main__":
122
+ logging.run_cli(main)
examples/atari/atari_qrdqn.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from atari_network import QRDQN
10
+ from atari_wrapper import make_atari_env
11
+
12
+ from tianshou.data import Collector, VectorReplayBuffer
13
+ from tianshou.highlevel.logger import LoggerFactoryDefault
14
+ from tianshou.policy import QRDQNPolicy
15
+ from tianshou.policy.base import BasePolicy
16
+ from tianshou.trainer import OffpolicyTrainer
17
+
18
+
19
+ def get_args() -> argparse.Namespace:
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
22
+ parser.add_argument("--seed", type=int, default=0)
23
+ parser.add_argument("--scale-obs", type=int, default=0)
24
+ parser.add_argument("--eps-test", type=float, default=0.005)
25
+ parser.add_argument("--eps-train", type=float, default=1.0)
26
+ parser.add_argument("--eps-train-final", type=float, default=0.05)
27
+ parser.add_argument("--buffer-size", type=int, default=100000)
28
+ parser.add_argument("--lr", type=float, default=0.0001)
29
+ parser.add_argument("--gamma", type=float, default=0.99)
30
+ parser.add_argument("--num-quantiles", type=int, default=200)
31
+ parser.add_argument("--n-step", type=int, default=3)
32
+ parser.add_argument("--target-update-freq", type=int, default=500)
33
+ parser.add_argument("--epoch", type=int, default=100)
34
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
35
+ parser.add_argument("--step-per-collect", type=int, default=10)
36
+ parser.add_argument("--update-per-step", type=float, default=0.1)
37
+ parser.add_argument("--batch-size", type=int, default=32)
38
+ parser.add_argument("--training-num", type=int, default=10)
39
+ parser.add_argument("--test-num", type=int, default=10)
40
+ parser.add_argument("--logdir", type=str, default="log")
41
+ parser.add_argument("--render", type=float, default=0.0)
42
+ parser.add_argument(
43
+ "--device",
44
+ type=str,
45
+ default="cuda" if torch.cuda.is_available() else "cpu",
46
+ )
47
+ parser.add_argument("--frames-stack", type=int, default=4)
48
+ parser.add_argument("--resume-path", type=str, default=None)
49
+ parser.add_argument("--resume-id", type=str, default=None)
50
+ parser.add_argument(
51
+ "--logger",
52
+ type=str,
53
+ default="tensorboard",
54
+ choices=["tensorboard", "wandb"],
55
+ )
56
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
57
+ parser.add_argument(
58
+ "--watch",
59
+ default=False,
60
+ action="store_true",
61
+ help="watch the play of pre-trained policy only",
62
+ )
63
+ parser.add_argument("--save-buffer-name", type=str, default=None)
64
+ return parser.parse_args()
65
+
66
+
67
+ def test_qrdqn(args: argparse.Namespace = get_args()) -> None:
68
+ env, train_envs, test_envs = make_atari_env(
69
+ args.task,
70
+ args.seed,
71
+ args.training_num,
72
+ args.test_num,
73
+ scale=args.scale_obs,
74
+ frame_stack=args.frames_stack,
75
+ )
76
+ args.state_shape = env.observation_space.shape or env.observation_space.n
77
+ args.action_shape = env.action_space.shape or env.action_space.n
78
+ # should be N_FRAMES x H x W
79
+ print("Observations shape:", args.state_shape)
80
+ print("Actions shape:", args.action_shape)
81
+ # seed
82
+ np.random.seed(args.seed)
83
+ torch.manual_seed(args.seed)
84
+ # define model
85
+ c, h, w = args.state_shape
86
+ net = QRDQN(
87
+ c=c,
88
+ h=h,
89
+ w=w,
90
+ action_shape=args.action_shape,
91
+ num_quantiles=args.num_quantiles,
92
+ device=args.device,
93
+ )
94
+ optim = torch.optim.Adam(net.parameters(), lr=args.lr)
95
+ # define policy
96
+ policy: QRDQNPolicy = QRDQNPolicy(
97
+ model=net,
98
+ optim=optim,
99
+ action_space=env.action_space,
100
+ discount_factor=args.gamma,
101
+ num_quantiles=args.num_quantiles,
102
+ estimation_step=args.n_step,
103
+ target_update_freq=args.target_update_freq,
104
+ ).to(args.device)
105
+ # load a previous policy
106
+ if args.resume_path:
107
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
108
+ print("Loaded agent from: ", args.resume_path)
109
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
110
+ # when you have enough RAM
111
+ buffer = VectorReplayBuffer(
112
+ args.buffer_size,
113
+ buffer_num=len(train_envs),
114
+ ignore_obs_next=True,
115
+ save_only_last_obs=True,
116
+ stack_num=args.frames_stack,
117
+ )
118
+ # collector
119
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
120
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
121
+
122
+ # log
123
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
124
+ args.algo_name = "qrdqn"
125
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
126
+ log_path = os.path.join(args.logdir, log_name)
127
+
128
+ # logger
129
+ logger_factory = LoggerFactoryDefault()
130
+ if args.logger == "wandb":
131
+ logger_factory.logger_type = "wandb"
132
+ logger_factory.wandb_project = args.wandb_project
133
+ else:
134
+ logger_factory.logger_type = "tensorboard"
135
+
136
+ logger = logger_factory.create_logger(
137
+ log_dir=log_path,
138
+ experiment_name=log_name,
139
+ run_id=args.resume_id,
140
+ config_dict=vars(args),
141
+ )
142
+
143
+ def save_best_fn(policy: BasePolicy) -> None:
144
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
145
+
146
+ def stop_fn(mean_rewards: float) -> bool:
147
+ if env.spec.reward_threshold:
148
+ return mean_rewards >= env.spec.reward_threshold
149
+ if "Pong" in args.task:
150
+ return mean_rewards >= 20
151
+ return False
152
+
153
+ def train_fn(epoch: int, env_step: int) -> None:
154
+ # nature DQN setting, linear decay in the first 1M steps
155
+ if env_step <= 1e6:
156
+ eps = args.eps_train - env_step / 1e6 * (args.eps_train - args.eps_train_final)
157
+ else:
158
+ eps = args.eps_train_final
159
+ policy.set_eps(eps)
160
+ if env_step % 1000 == 0:
161
+ logger.write("train/env_step", env_step, {"train/eps": eps})
162
+
163
+ def test_fn(epoch: int, env_step: int | None) -> None:
164
+ policy.set_eps(args.eps_test)
165
+
166
+ # watch agent's performance
167
+ def watch() -> None:
168
+ print("Setup test envs ...")
169
+ policy.set_eps(args.eps_test)
170
+ test_envs.seed(args.seed)
171
+ if args.save_buffer_name:
172
+ print(f"Generate buffer with size {args.buffer_size}")
173
+ buffer = VectorReplayBuffer(
174
+ args.buffer_size,
175
+ buffer_num=len(test_envs),
176
+ ignore_obs_next=True,
177
+ save_only_last_obs=True,
178
+ stack_num=args.frames_stack,
179
+ )
180
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
181
+ result = collector.collect(n_step=args.buffer_size)
182
+ print(f"Save buffer into {args.save_buffer_name}")
183
+ # Unfortunately, pickle will cause oom with 1M buffer size
184
+ buffer.save_hdf5(args.save_buffer_name)
185
+ else:
186
+ print("Testing agent ...")
187
+ test_collector.reset()
188
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
189
+ result.pprint_asdict()
190
+
191
+ if args.watch:
192
+ watch()
193
+ sys.exit(0)
194
+
195
+ # test train_collector and start filling replay buffer
196
+ train_collector.reset()
197
+ train_collector.collect(n_step=args.batch_size * args.training_num)
198
+ # trainer
199
+ result = OffpolicyTrainer(
200
+ policy=policy,
201
+ train_collector=train_collector,
202
+ test_collector=test_collector,
203
+ max_epoch=args.epoch,
204
+ step_per_epoch=args.step_per_epoch,
205
+ step_per_collect=args.step_per_collect,
206
+ episode_per_test=args.test_num,
207
+ batch_size=args.batch_size,
208
+ train_fn=train_fn,
209
+ test_fn=test_fn,
210
+ stop_fn=stop_fn,
211
+ save_best_fn=save_best_fn,
212
+ logger=logger,
213
+ update_per_step=args.update_per_step,
214
+ test_in_train=False,
215
+ ).run()
216
+
217
+ pprint.pprint(result)
218
+ watch()
219
+
220
+
221
+ if __name__ == "__main__":
222
+ test_qrdqn(get_args())
examples/atari/atari_rainbow.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from atari_network import Rainbow
10
+ from atari_wrapper import make_atari_env
11
+
12
+ from tianshou.data import Collector, PrioritizedVectorReplayBuffer, VectorReplayBuffer
13
+ from tianshou.highlevel.logger import LoggerFactoryDefault
14
+ from tianshou.policy import C51Policy, RainbowPolicy
15
+ from tianshou.policy.base import BasePolicy
16
+ from tianshou.trainer import OffpolicyTrainer
17
+
18
+
19
+ def get_args() -> argparse.Namespace:
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
22
+ parser.add_argument("--seed", type=int, default=0)
23
+ parser.add_argument("--scale-obs", type=int, default=0)
24
+ parser.add_argument("--eps-test", type=float, default=0.005)
25
+ parser.add_argument("--eps-train", type=float, default=1.0)
26
+ parser.add_argument("--eps-train-final", type=float, default=0.05)
27
+ parser.add_argument("--buffer-size", type=int, default=100000)
28
+ parser.add_argument("--lr", type=float, default=0.0000625)
29
+ parser.add_argument("--gamma", type=float, default=0.99)
30
+ parser.add_argument("--num-atoms", type=int, default=51)
31
+ parser.add_argument("--v-min", type=float, default=-10.0)
32
+ parser.add_argument("--v-max", type=float, default=10.0)
33
+ parser.add_argument("--noisy-std", type=float, default=0.1)
34
+ parser.add_argument("--no-dueling", action="store_true", default=False)
35
+ parser.add_argument("--no-noisy", action="store_true", default=False)
36
+ parser.add_argument("--no-priority", action="store_true", default=False)
37
+ parser.add_argument("--alpha", type=float, default=0.5)
38
+ parser.add_argument("--beta", type=float, default=0.4)
39
+ parser.add_argument("--beta-final", type=float, default=1.0)
40
+ parser.add_argument("--beta-anneal-step", type=int, default=5000000)
41
+ parser.add_argument("--no-weight-norm", action="store_true", default=False)
42
+ parser.add_argument("--n-step", type=int, default=3)
43
+ parser.add_argument("--target-update-freq", type=int, default=500)
44
+ parser.add_argument("--epoch", type=int, default=100)
45
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
46
+ parser.add_argument("--step-per-collect", type=int, default=10)
47
+ parser.add_argument("--update-per-step", type=float, default=0.1)
48
+ parser.add_argument("--batch-size", type=int, default=32)
49
+ parser.add_argument("--training-num", type=int, default=10)
50
+ parser.add_argument("--test-num", type=int, default=10)
51
+ parser.add_argument("--logdir", type=str, default="log")
52
+ parser.add_argument("--render", type=float, default=0.0)
53
+ parser.add_argument(
54
+ "--device",
55
+ type=str,
56
+ default="cuda" if torch.cuda.is_available() else "cpu",
57
+ )
58
+ parser.add_argument("--frames-stack", type=int, default=4)
59
+ parser.add_argument("--resume-path", type=str, default=None)
60
+ parser.add_argument("--resume-id", type=str, default=None)
61
+ parser.add_argument(
62
+ "--logger",
63
+ type=str,
64
+ default="tensorboard",
65
+ choices=["tensorboard", "wandb"],
66
+ )
67
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
68
+ parser.add_argument(
69
+ "--watch",
70
+ default=False,
71
+ action="store_true",
72
+ help="watch the play of pre-trained policy only",
73
+ )
74
+ parser.add_argument("--save-buffer-name", type=str, default=None)
75
+ return parser.parse_args()
76
+
77
+
78
+ def test_rainbow(args: argparse.Namespace = get_args()) -> None:
79
+ env, train_envs, test_envs = make_atari_env(
80
+ args.task,
81
+ args.seed,
82
+ args.training_num,
83
+ args.test_num,
84
+ scale=args.scale_obs,
85
+ frame_stack=args.frames_stack,
86
+ )
87
+ args.state_shape = env.observation_space.shape or env.observation_space.n
88
+ args.action_shape = env.action_space.shape or env.action_space.n
89
+ # should be N_FRAMES x H x W
90
+ print("Observations shape:", args.state_shape)
91
+ print("Actions shape:", args.action_shape)
92
+ # seed
93
+ np.random.seed(args.seed)
94
+ torch.manual_seed(args.seed)
95
+ # define model
96
+ net = Rainbow(
97
+ *args.state_shape,
98
+ args.action_shape,
99
+ args.num_atoms,
100
+ args.noisy_std,
101
+ args.device,
102
+ is_dueling=not args.no_dueling,
103
+ is_noisy=not args.no_noisy,
104
+ )
105
+ optim = torch.optim.Adam(net.parameters(), lr=args.lr)
106
+ # define policy
107
+ policy: C51Policy = RainbowPolicy(
108
+ model=net,
109
+ optim=optim,
110
+ discount_factor=args.gamma,
111
+ action_space=env.action_space,
112
+ num_atoms=args.num_atoms,
113
+ v_min=args.v_min,
114
+ v_max=args.v_max,
115
+ estimation_step=args.n_step,
116
+ target_update_freq=args.target_update_freq,
117
+ ).to(args.device)
118
+ # load a previous policy
119
+ if args.resume_path:
120
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
121
+ print("Loaded agent from: ", args.resume_path)
122
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
123
+ # when you have enough RAM
124
+ buffer: VectorReplayBuffer | PrioritizedVectorReplayBuffer
125
+ if args.no_priority:
126
+ buffer = VectorReplayBuffer(
127
+ args.buffer_size,
128
+ buffer_num=len(train_envs),
129
+ ignore_obs_next=True,
130
+ save_only_last_obs=True,
131
+ stack_num=args.frames_stack,
132
+ )
133
+ else:
134
+ buffer = PrioritizedVectorReplayBuffer(
135
+ args.buffer_size,
136
+ buffer_num=len(train_envs),
137
+ ignore_obs_next=True,
138
+ save_only_last_obs=True,
139
+ stack_num=args.frames_stack,
140
+ alpha=args.alpha,
141
+ beta=args.beta,
142
+ weight_norm=not args.no_weight_norm,
143
+ )
144
+ # collector
145
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
146
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
147
+
148
+ # log
149
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
150
+ args.algo_name = "rainbow"
151
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
152
+ log_path = os.path.join(args.logdir, log_name)
153
+
154
+ # logger
155
+ logger_factory = LoggerFactoryDefault()
156
+ if args.logger == "wandb":
157
+ logger_factory.logger_type = "wandb"
158
+ logger_factory.wandb_project = args.wandb_project
159
+ else:
160
+ logger_factory.logger_type = "tensorboard"
161
+
162
+ logger = logger_factory.create_logger(
163
+ log_dir=log_path,
164
+ experiment_name=log_name,
165
+ run_id=args.resume_id,
166
+ config_dict=vars(args),
167
+ )
168
+
169
+ def save_best_fn(policy: BasePolicy) -> None:
170
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
171
+
172
+ def stop_fn(mean_rewards: float) -> bool:
173
+ if env.spec.reward_threshold:
174
+ return mean_rewards >= env.spec.reward_threshold
175
+ if "Pong" in args.task:
176
+ return mean_rewards >= 20
177
+ return False
178
+
179
+ def train_fn(epoch: int, env_step: int) -> None:
180
+ # nature DQN setting, linear decay in the first 1M steps
181
+ if env_step <= 1e6:
182
+ eps = args.eps_train - env_step / 1e6 * (args.eps_train - args.eps_train_final)
183
+ else:
184
+ eps = args.eps_train_final
185
+ policy.set_eps(eps)
186
+ if env_step % 1000 == 0:
187
+ logger.write("train/env_step", env_step, {"train/eps": eps})
188
+ if not args.no_priority:
189
+ if env_step <= args.beta_anneal_step:
190
+ beta = args.beta - env_step / args.beta_anneal_step * (args.beta - args.beta_final)
191
+ else:
192
+ beta = args.beta_final
193
+ buffer.set_beta(beta)
194
+ if env_step % 1000 == 0:
195
+ logger.write("train/env_step", env_step, {"train/beta": beta})
196
+
197
+ def test_fn(epoch: int, env_step: int | None) -> None:
198
+ policy.set_eps(args.eps_test)
199
+
200
+ # watch agent's performance
201
+ def watch() -> None:
202
+ print("Setup test envs ...")
203
+ policy.set_eps(args.eps_test)
204
+ test_envs.seed(args.seed)
205
+ if args.save_buffer_name:
206
+ print(f"Generate buffer with size {args.buffer_size}")
207
+ buffer = PrioritizedVectorReplayBuffer(
208
+ args.buffer_size,
209
+ buffer_num=len(test_envs),
210
+ ignore_obs_next=True,
211
+ save_only_last_obs=True,
212
+ stack_num=args.frames_stack,
213
+ alpha=args.alpha,
214
+ beta=args.beta,
215
+ )
216
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
217
+ result = collector.collect(n_step=args.buffer_size)
218
+ print(f"Save buffer into {args.save_buffer_name}")
219
+ # Unfortunately, pickle will cause oom with 1M buffer size
220
+ buffer.save_hdf5(args.save_buffer_name)
221
+ else:
222
+ print("Testing agent ...")
223
+ test_collector.reset()
224
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
225
+ result.pprint_asdict()
226
+
227
+ if args.watch:
228
+ watch()
229
+ sys.exit(0)
230
+
231
+ # test train_collector and start filling replay buffer
232
+ train_collector.reset()
233
+ train_collector.collect(n_step=args.batch_size * args.training_num)
234
+ # trainer
235
+ result = OffpolicyTrainer(
236
+ policy=policy,
237
+ train_collector=train_collector,
238
+ test_collector=test_collector,
239
+ max_epoch=args.epoch,
240
+ step_per_epoch=args.step_per_epoch,
241
+ step_per_collect=args.step_per_collect,
242
+ episode_per_test=args.test_num,
243
+ batch_size=args.batch_size,
244
+ train_fn=train_fn,
245
+ test_fn=test_fn,
246
+ stop_fn=stop_fn,
247
+ save_best_fn=save_best_fn,
248
+ logger=logger,
249
+ update_per_step=args.update_per_step,
250
+ test_in_train=False,
251
+ ).run()
252
+
253
+ pprint.pprint(result)
254
+ watch()
255
+
256
+
257
+ if __name__ == "__main__":
258
+ test_rainbow(get_args())
examples/atari/atari_sac.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import os
4
+ import pprint
5
+ import sys
6
+
7
+ import numpy as np
8
+ import torch
9
+ from atari_network import DQN
10
+ from atari_wrapper import make_atari_env
11
+
12
+ from tianshou.data import Collector, VectorReplayBuffer
13
+ from tianshou.highlevel.logger import LoggerFactoryDefault
14
+ from tianshou.policy import DiscreteSACPolicy, ICMPolicy
15
+ from tianshou.policy.base import BasePolicy
16
+ from tianshou.trainer import OffpolicyTrainer
17
+ from tianshou.utils.net.discrete import Actor, Critic, IntrinsicCuriosityModule
18
+
19
+
20
+ def get_args() -> argparse.Namespace:
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument("--task", type=str, default="PongNoFrameskip-v4")
23
+ parser.add_argument("--seed", type=int, default=4213)
24
+ parser.add_argument("--scale-obs", type=int, default=0)
25
+ parser.add_argument("--buffer-size", type=int, default=100000)
26
+ parser.add_argument("--actor-lr", type=float, default=1e-5)
27
+ parser.add_argument("--critic-lr", type=float, default=1e-5)
28
+ parser.add_argument("--gamma", type=float, default=0.99)
29
+ parser.add_argument("--n-step", type=int, default=3)
30
+ parser.add_argument("--tau", type=float, default=0.005)
31
+ parser.add_argument("--alpha", type=float, default=0.05)
32
+ parser.add_argument("--auto-alpha", action="store_true", default=False)
33
+ parser.add_argument("--alpha-lr", type=float, default=3e-4)
34
+ parser.add_argument("--epoch", type=int, default=100)
35
+ parser.add_argument("--step-per-epoch", type=int, default=100000)
36
+ parser.add_argument("--step-per-collect", type=int, default=10)
37
+ parser.add_argument("--update-per-step", type=float, default=0.1)
38
+ parser.add_argument("--batch-size", type=int, default=64)
39
+ parser.add_argument("--hidden-size", type=int, default=512)
40
+ parser.add_argument("--training-num", type=int, default=10)
41
+ parser.add_argument("--test-num", type=int, default=10)
42
+ parser.add_argument("--rew-norm", type=int, default=False)
43
+ parser.add_argument("--logdir", type=str, default="log")
44
+ parser.add_argument("--render", type=float, default=0.0)
45
+ parser.add_argument(
46
+ "--device",
47
+ type=str,
48
+ default="cuda" if torch.cuda.is_available() else "cpu",
49
+ )
50
+ parser.add_argument("--frames-stack", type=int, default=4)
51
+ parser.add_argument("--resume-path", type=str, default=None)
52
+ parser.add_argument("--resume-id", type=str, default=None)
53
+ parser.add_argument(
54
+ "--logger",
55
+ type=str,
56
+ default="tensorboard",
57
+ choices=["tensorboard", "wandb"],
58
+ )
59
+ parser.add_argument("--wandb-project", type=str, default="atari.benchmark")
60
+ parser.add_argument(
61
+ "--watch",
62
+ default=False,
63
+ action="store_true",
64
+ help="watch the play of pre-trained policy only",
65
+ )
66
+ parser.add_argument("--save-buffer-name", type=str, default=None)
67
+ parser.add_argument(
68
+ "--icm-lr-scale",
69
+ type=float,
70
+ default=0.0,
71
+ help="use intrinsic curiosity module with this lr scale",
72
+ )
73
+ parser.add_argument(
74
+ "--icm-reward-scale",
75
+ type=float,
76
+ default=0.01,
77
+ help="scaling factor for intrinsic curiosity reward",
78
+ )
79
+ parser.add_argument(
80
+ "--icm-forward-loss-weight",
81
+ type=float,
82
+ default=0.2,
83
+ help="weight for the forward model loss in ICM",
84
+ )
85
+ return parser.parse_args()
86
+
87
+
88
+ def test_discrete_sac(args: argparse.Namespace = get_args()) -> None:
89
+ env, train_envs, test_envs = make_atari_env(
90
+ args.task,
91
+ args.seed,
92
+ args.training_num,
93
+ args.test_num,
94
+ scale=args.scale_obs,
95
+ frame_stack=args.frames_stack,
96
+ )
97
+ args.state_shape = env.observation_space.shape or env.observation_space.n
98
+ args.action_shape = env.action_space.shape or env.action_space.n
99
+ # should be N_FRAMES x H x W
100
+ print("Observations shape:", args.state_shape)
101
+ print("Actions shape:", args.action_shape)
102
+ # seed
103
+ np.random.seed(args.seed)
104
+ torch.manual_seed(args.seed)
105
+ # define model
106
+ net = DQN(
107
+ *args.state_shape,
108
+ args.action_shape,
109
+ device=args.device,
110
+ features_only=True,
111
+ output_dim_added_layer=args.hidden_size,
112
+ )
113
+ actor = Actor(net, args.action_shape, device=args.device, softmax_output=False)
114
+ actor_optim = torch.optim.Adam(actor.parameters(), lr=args.actor_lr)
115
+ critic1 = Critic(net, last_size=args.action_shape, device=args.device)
116
+ critic1_optim = torch.optim.Adam(critic1.parameters(), lr=args.critic_lr)
117
+ critic2 = Critic(net, last_size=args.action_shape, device=args.device)
118
+ critic2_optim = torch.optim.Adam(critic2.parameters(), lr=args.critic_lr)
119
+
120
+ # define policy
121
+ if args.auto_alpha:
122
+ target_entropy = 0.98 * np.log(np.prod(args.action_shape))
123
+ log_alpha = torch.zeros(1, requires_grad=True, device=args.device)
124
+ alpha_optim = torch.optim.Adam([log_alpha], lr=args.alpha_lr)
125
+ args.alpha = (target_entropy, log_alpha, alpha_optim)
126
+
127
+ policy: DiscreteSACPolicy | ICMPolicy
128
+ policy = DiscreteSACPolicy(
129
+ actor=actor,
130
+ actor_optim=actor_optim,
131
+ critic=critic1,
132
+ critic_optim=critic1_optim,
133
+ critic2=critic2,
134
+ critic2_optim=critic2_optim,
135
+ action_space=env.action_space,
136
+ tau=args.tau,
137
+ gamma=args.gamma,
138
+ alpha=args.alpha,
139
+ estimation_step=args.n_step,
140
+ ).to(args.device)
141
+ if args.icm_lr_scale > 0:
142
+ feature_net = DQN(*args.state_shape, args.action_shape, args.device, features_only=True)
143
+ action_dim = np.prod(args.action_shape)
144
+ feature_dim = feature_net.output_dim
145
+ icm_net = IntrinsicCuriosityModule(
146
+ feature_net.net,
147
+ feature_dim,
148
+ action_dim,
149
+ hidden_sizes=[args.hidden_size],
150
+ device=args.device,
151
+ )
152
+ icm_optim = torch.optim.Adam(icm_net.parameters(), lr=args.actor_lr)
153
+ policy = ICMPolicy(
154
+ policy=policy,
155
+ model=icm_net,
156
+ optim=icm_optim,
157
+ action_space=env.action_space,
158
+ lr_scale=args.icm_lr_scale,
159
+ reward_scale=args.icm_reward_scale,
160
+ forward_loss_weight=args.icm_forward_loss_weight,
161
+ ).to(args.device)
162
+ # load a previous policy
163
+ if args.resume_path:
164
+ policy.load_state_dict(torch.load(args.resume_path, map_location=args.device))
165
+ print("Loaded agent from: ", args.resume_path)
166
+ # replay buffer: `save_last_obs` and `stack_num` can be removed together
167
+ # when you have enough RAM
168
+ buffer = VectorReplayBuffer(
169
+ args.buffer_size,
170
+ buffer_num=len(train_envs),
171
+ ignore_obs_next=True,
172
+ save_only_last_obs=True,
173
+ stack_num=args.frames_stack,
174
+ )
175
+ # collector
176
+ train_collector = Collector(policy, train_envs, buffer, exploration_noise=True)
177
+ test_collector = Collector(policy, test_envs, exploration_noise=True)
178
+
179
+ # log
180
+ now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
181
+ args.algo_name = "discrete_sac_icm" if args.icm_lr_scale > 0 else "discrete_sac"
182
+ log_name = os.path.join(args.task, args.algo_name, str(args.seed), now)
183
+ log_path = os.path.join(args.logdir, log_name)
184
+
185
+ # logger
186
+ logger_factory = LoggerFactoryDefault()
187
+ if args.logger == "wandb":
188
+ logger_factory.logger_type = "wandb"
189
+ logger_factory.wandb_project = args.wandb_project
190
+ else:
191
+ logger_factory.logger_type = "tensorboard"
192
+
193
+ logger = logger_factory.create_logger(
194
+ log_dir=log_path,
195
+ experiment_name=log_name,
196
+ run_id=args.resume_id,
197
+ config_dict=vars(args),
198
+ )
199
+
200
+ def save_best_fn(policy: BasePolicy) -> None:
201
+ torch.save(policy.state_dict(), os.path.join(log_path, "policy.pth"))
202
+
203
+ def stop_fn(mean_rewards: float) -> bool:
204
+ if env.spec.reward_threshold:
205
+ return mean_rewards >= env.spec.reward_threshold
206
+ if "Pong" in args.task:
207
+ return mean_rewards >= 20
208
+ return False
209
+
210
+ def save_checkpoint_fn(epoch: int, env_step: int, gradient_step: int) -> str:
211
+ # see also: https://pytorch.org/tutorials/beginner/saving_loading_models.html
212
+ ckpt_path = os.path.join(log_path, "checkpoint.pth")
213
+ torch.save({"model": policy.state_dict()}, ckpt_path)
214
+ return ckpt_path
215
+
216
+ # watch agent's performance
217
+ def watch() -> None:
218
+ print("Setup test envs ...")
219
+ test_envs.seed(args.seed)
220
+ if args.save_buffer_name:
221
+ print(f"Generate buffer with size {args.buffer_size}")
222
+ buffer = VectorReplayBuffer(
223
+ args.buffer_size,
224
+ buffer_num=len(test_envs),
225
+ ignore_obs_next=True,
226
+ save_only_last_obs=True,
227
+ stack_num=args.frames_stack,
228
+ )
229
+ collector = Collector(policy, test_envs, buffer, exploration_noise=True)
230
+ result = collector.collect(n_step=args.buffer_size)
231
+ print(f"Save buffer into {args.save_buffer_name}")
232
+ # Unfortunately, pickle will cause oom with 1M buffer size
233
+ buffer.save_hdf5(args.save_buffer_name)
234
+ else:
235
+ print("Testing agent ...")
236
+ test_collector.reset()
237
+ result = test_collector.collect(n_episode=args.test_num, render=args.render)
238
+ result.pprint_asdict()
239
+
240
+ if args.watch:
241
+ watch()
242
+ sys.exit(0)
243
+
244
+ # test train_collector and start filling replay buffer
245
+ train_collector.reset()
246
+ train_collector.collect(n_step=args.batch_size * args.training_num)
247
+ # trainer
248
+ result = OffpolicyTrainer(
249
+ policy=policy,
250
+ train_collector=train_collector,
251
+ test_collector=test_collector,
252
+ max_epoch=args.epoch,
253
+ step_per_epoch=args.step_per_epoch,
254
+ step_per_collect=args.step_per_collect,
255
+ episode_per_test=args.test_num,
256
+ batch_size=args.batch_size,
257
+ stop_fn=stop_fn,
258
+ save_best_fn=save_best_fn,
259
+ logger=logger,
260
+ update_per_step=args.update_per_step,
261
+ test_in_train=False,
262
+ resume_from_log=args.resume_id is not None,
263
+ save_checkpoint_fn=save_checkpoint_fn,
264
+ ).run()
265
+
266
+ pprint.pprint(result)
267
+ watch()
268
+
269
+
270
+ if __name__ == "__main__":
271
+ test_discrete_sac(get_args())
examples/atari/atari_sac_hl.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import os
4
+ from collections.abc import Sequence
5
+
6
+ from examples.atari.atari_network import (
7
+ ActorFactoryAtariDQN,
8
+ IntermediateModuleFactoryAtariDQNFeatures,
9
+ )
10
+ from examples.atari.atari_wrapper import AtariEnvFactory, AtariEpochStopCallback
11
+ from tianshou.highlevel.config import SamplingConfig
12
+ from tianshou.highlevel.experiment import (
13
+ DiscreteSACExperimentBuilder,
14
+ ExperimentConfig,
15
+ )
16
+ from tianshou.highlevel.params.alpha import AutoAlphaFactoryDefault
17
+ from tianshou.highlevel.params.policy_params import DiscreteSACParams
18
+ from tianshou.highlevel.params.policy_wrapper import (
19
+ PolicyWrapperFactoryIntrinsicCuriosity,
20
+ )
21
+ from tianshou.utils import logging
22
+ from tianshou.utils.logging import datetime_tag
23
+
24
+
25
+ def main(
26
+ experiment_config: ExperimentConfig,
27
+ task: str = "PongNoFrameskip-v4",
28
+ scale_obs: bool = False,
29
+ buffer_size: int = 100000,
30
+ actor_lr: float = 1e-5,
31
+ critic_lr: float = 1e-5,
32
+ gamma: float = 0.99,
33
+ n_step: int = 3,
34
+ tau: float = 0.005,
35
+ alpha: float = 0.05,
36
+ auto_alpha: bool = False,
37
+ alpha_lr: float = 3e-4,
38
+ epoch: int = 100,
39
+ step_per_epoch: int = 100000,
40
+ step_per_collect: int = 10,
41
+ update_per_step: float = 0.1,
42
+ batch_size: int = 64,
43
+ hidden_sizes: Sequence[int] = (512,),
44
+ training_num: int = 10,
45
+ test_num: int = 10,
46
+ frames_stack: int = 4,
47
+ save_buffer_name: str | None = None, # TODO add support in high-level API?
48
+ icm_lr_scale: float = 0.0,
49
+ icm_reward_scale: float = 0.01,
50
+ icm_forward_loss_weight: float = 0.2,
51
+ ) -> None:
52
+ log_name = os.path.join(task, "sac", str(experiment_config.seed), datetime_tag())
53
+
54
+ sampling_config = SamplingConfig(
55
+ num_epochs=epoch,
56
+ step_per_epoch=step_per_epoch,
57
+ update_per_step=update_per_step,
58
+ batch_size=batch_size,
59
+ num_train_envs=training_num,
60
+ num_test_envs=test_num,
61
+ buffer_size=buffer_size,
62
+ step_per_collect=step_per_collect,
63
+ repeat_per_collect=None,
64
+ replay_buffer_stack_num=frames_stack,
65
+ replay_buffer_ignore_obs_next=True,
66
+ replay_buffer_save_only_last_obs=True,
67
+ )
68
+
69
+ env_factory = AtariEnvFactory(
70
+ task,
71
+ sampling_config.train_seed,
72
+ sampling_config.test_seed,
73
+ frames_stack,
74
+ scale=scale_obs,
75
+ )
76
+
77
+ builder = (
78
+ DiscreteSACExperimentBuilder(env_factory, experiment_config, sampling_config)
79
+ .with_sac_params(
80
+ DiscreteSACParams(
81
+ actor_lr=actor_lr,
82
+ critic1_lr=critic_lr,
83
+ critic2_lr=critic_lr,
84
+ gamma=gamma,
85
+ tau=tau,
86
+ alpha=AutoAlphaFactoryDefault(lr=alpha_lr) if auto_alpha else alpha,
87
+ estimation_step=n_step,
88
+ ),
89
+ )
90
+ .with_actor_factory(ActorFactoryAtariDQN(scale_obs=False, features_only=True))
91
+ .with_common_critic_factory_use_actor()
92
+ .with_epoch_stop_callback(AtariEpochStopCallback(task))
93
+ )
94
+ if icm_lr_scale > 0:
95
+ builder.with_policy_wrapper_factory(
96
+ PolicyWrapperFactoryIntrinsicCuriosity(
97
+ feature_net_factory=IntermediateModuleFactoryAtariDQNFeatures(),
98
+ hidden_sizes=hidden_sizes,
99
+ lr=actor_lr,
100
+ lr_scale=icm_lr_scale,
101
+ reward_scale=icm_reward_scale,
102
+ forward_loss_weight=icm_forward_loss_weight,
103
+ ),
104
+ )
105
+ experiment = builder.build()
106
+ experiment.run(run_name=log_name)
107
+
108
+
109
+ if __name__ == "__main__":
110
+ logging.run_cli(main)
examples/atari/atari_wrapper.py ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Borrow a lot from openai baselines:
2
+ # https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py
3
+ import logging
4
+ import warnings
5
+ from collections import deque
6
+ from typing import Any, SupportsFloat
7
+
8
+ import cv2
9
+ import gymnasium as gym
10
+ import numpy as np
11
+ from gymnasium import Env
12
+
13
+ from examples.atari.tianshou.env import BaseVectorEnv
14
+ from examples.atari.tianshou.highlevel.env import (
15
+ EnvFactoryRegistered,
16
+ EnvMode,
17
+ EnvPoolFactory,
18
+ VectorEnvType,
19
+ )
20
+ from examples.atari.tianshou.highlevel.trainer import EpochStopCallback, TrainingContext
21
+
22
+ envpool_is_available = True
23
+ try:
24
+ import envpool
25
+ except ImportError:
26
+ envpool_is_available = False
27
+ envpool = None
28
+ log = logging.getLogger(__name__)
29
+
30
+
31
+ def _parse_reset_result(reset_result: tuple) -> tuple[tuple, dict, bool]:
32
+ contains_info = (
33
+ isinstance(reset_result, tuple)
34
+ and len(reset_result) == 2
35
+ and isinstance(reset_result[1], dict)
36
+ )
37
+ if contains_info:
38
+ return reset_result[0], reset_result[1], contains_info
39
+ return reset_result, {}, contains_info
40
+
41
+
42
+ def get_space_dtype(obs_space: gym.spaces.Box) -> type[np.floating] | type[np.integer]:
43
+ obs_space_dtype: type[np.integer] | type[np.floating]
44
+ if np.issubdtype(obs_space.dtype, np.integer):
45
+ obs_space_dtype = np.integer
46
+ elif np.issubdtype(obs_space.dtype, np.floating):
47
+ obs_space_dtype = np.floating
48
+ else:
49
+ raise TypeError(
50
+ f"Unsupported observation space dtype: {obs_space.dtype}. "
51
+ f"This might be a bug in tianshou or gymnasium, please report it!",
52
+ )
53
+ return obs_space_dtype
54
+
55
+
56
+ class NoopResetEnv(gym.Wrapper):
57
+ """Sample initial states by taking random number of no-ops on reset.
58
+
59
+ No-op is assumed to be action 0.
60
+
61
+ :param gym.Env env: the environment to wrap.
62
+ :param int noop_max: the maximum value of no-ops to run.
63
+ """
64
+
65
+ def __init__(self, env: gym.Env, noop_max: int = 30) -> None:
66
+ super().__init__(env)
67
+ self.noop_max = noop_max
68
+ self.noop_action = 0
69
+ assert hasattr(env.unwrapped, "get_action_meanings")
70
+ assert env.unwrapped.get_action_meanings()[0] == "NOOP"
71
+
72
+ def reset(self, **kwargs: Any) -> tuple[Any, dict[str, Any]]:
73
+ _, info, return_info = _parse_reset_result(self.env.reset(**kwargs))
74
+ noops = self.unwrapped.np_random.integers(1, self.noop_max + 1)
75
+ for _ in range(noops):
76
+ step_result = self.env.step(self.noop_action)
77
+ if len(step_result) == 4:
78
+ obs, rew, done, info = step_result # type: ignore[unreachable] # mypy doesn't know that Gym version <0.26 has only 4 items (no truncation)
79
+ else:
80
+ obs, rew, term, trunc, info = step_result
81
+ done = term or trunc
82
+ if done:
83
+ obs, info, _ = _parse_reset_result(self.env.reset())
84
+ if return_info:
85
+ return obs, info
86
+ return obs, {}
87
+
88
+
89
+ class MaxAndSkipEnv(gym.Wrapper):
90
+ """Return only every `skip`-th frame (frameskipping) using most recent raw observations (for max pooling across time steps).
91
+
92
+ :param gym.Env env: the environment to wrap.
93
+ :param int skip: number of `skip`-th frame.
94
+ """
95
+
96
+ def __init__(self, env: gym.Env, skip: int = 4) -> None:
97
+ super().__init__(env)
98
+ self._skip = skip
99
+
100
+ def step(self, action: Any) -> tuple[Any, float, bool, bool, dict[str, Any]]:
101
+ """Step the environment with the given action.
102
+
103
+ Repeat action, sum reward, and max over last observations.
104
+ """
105
+ obs_list = []
106
+ total_reward = 0.0
107
+ new_step_api = False
108
+ for _ in range(self._skip):
109
+ step_result = self.env.step(action)
110
+ if len(step_result) == 4:
111
+ obs, reward, done, info = step_result # type: ignore[unreachable] # mypy doesn't know that Gym version <0.26 has only 4 items (no truncation)
112
+ else:
113
+ obs, reward, term, trunc, info = step_result
114
+ done = term or trunc
115
+ new_step_api = True
116
+ obs_list.append(obs)
117
+ total_reward += float(reward)
118
+ if done:
119
+ break
120
+ max_frame = np.max(obs_list[-2:], axis=0)
121
+ if new_step_api:
122
+ return max_frame, total_reward, term, trunc, info
123
+
124
+ return max_frame, total_reward, done, info.get("TimeLimit.truncated", False), info
125
+
126
+
127
+ class EpisodicLifeEnv(gym.Wrapper):
128
+ """Make end-of-life == end-of-episode, but only reset on true game over.
129
+
130
+ It helps the value estimation.
131
+
132
+ :param gym.Env env: the environment to wrap.
133
+ """
134
+
135
+ def __init__(self, env: gym.Env) -> None:
136
+ super().__init__(env)
137
+ self.lives = 0
138
+ self.was_real_done = True
139
+ self._return_info = False
140
+
141
+ def step(self, action: Any) -> tuple[Any, float, bool, bool, dict[str, Any]]:
142
+ step_result = self.env.step(action)
143
+ if len(step_result) == 4:
144
+ obs, reward, done, info = step_result # type: ignore[unreachable] # mypy doesn't know that Gym version <0.26 has only 4 items (no truncation)
145
+ new_step_api = False
146
+ else:
147
+ obs, reward, term, trunc, info = step_result
148
+ done = term or trunc
149
+ new_step_api = True
150
+ reward = float(reward)
151
+ self.was_real_done = done
152
+ # check current lives, make loss of life terminal, then update lives to
153
+ # handle bonus lives
154
+ assert hasattr(self.env.unwrapped, "ale")
155
+ lives = self.env.unwrapped.ale.lives()
156
+ if 0 < lives < self.lives:
157
+ # for Qbert sometimes we stay in lives == 0 condition for a few
158
+ # frames, so its important to keep lives > 0, so that we only reset
159
+ # once the environment is actually done.
160
+ done = True
161
+ term = True
162
+ self.lives = lives
163
+ if new_step_api:
164
+ return obs, reward, term, trunc, info
165
+ return obs, reward, done, info.get("TimeLimit.truncated", False), info
166
+
167
+ def reset(self, **kwargs: Any) -> tuple[Any, dict[str, Any]]:
168
+ """Calls the Gym environment reset, only when lives are exhausted.
169
+
170
+ This way all states are still reachable even though lives are episodic, and
171
+ the learner need not know about any of this behind-the-scenes.
172
+ """
173
+ if self.was_real_done:
174
+ obs, info, self._return_info = _parse_reset_result(self.env.reset(**kwargs))
175
+ else:
176
+ # no-op step to advance from terminal/lost life state
177
+ step_result = self.env.step(0)
178
+ obs, info = step_result[0], step_result[-1]
179
+ assert hasattr(self.env.unwrapped, "ale")
180
+ self.lives = self.env.unwrapped.ale.lives()
181
+ if self._return_info:
182
+ return obs, info
183
+ return obs, {}
184
+
185
+
186
+ class FireResetEnv(gym.Wrapper):
187
+ """Take action on reset for environments that are fixed until firing.
188
+
189
+ Related discussion: https://github.com/openai/baselines/issues/240.
190
+
191
+ :param gym.Env env: the environment to wrap.
192
+ """
193
+
194
+ def __init__(self, env: gym.Env) -> None:
195
+ super().__init__(env)
196
+ assert hasattr(env.unwrapped, "get_action_meanings")
197
+ assert env.unwrapped.get_action_meanings()[1] == "FIRE"
198
+ assert len(env.unwrapped.get_action_meanings()) >= 3
199
+
200
+ def reset(self, **kwargs: Any) -> tuple[Any, dict]:
201
+ _, _, return_info = _parse_reset_result(self.env.reset(**kwargs))
202
+ obs = self.env.step(1)[0]
203
+ return obs, {}
204
+
205
+
206
+ class WarpFrame(gym.ObservationWrapper):
207
+ """Warp frames to 84x84 as done in the Nature paper and later work.
208
+
209
+ :param gym.Env env: the environment to wrap.
210
+ """
211
+
212
+ def __init__(self, env: gym.Env) -> None:
213
+ super().__init__(env)
214
+ self.size = 84
215
+ obs_space = env.observation_space
216
+ assert isinstance(obs_space, gym.spaces.Box)
217
+ obs_space_dtype = get_space_dtype(obs_space)
218
+ self.observation_space = gym.spaces.Box(
219
+ low=np.min(obs_space.low),
220
+ high=np.max(obs_space.high),
221
+ shape=(self.size, self.size),
222
+ dtype=obs_space_dtype,
223
+ )
224
+
225
+ def observation(self, frame: np.ndarray) -> np.ndarray:
226
+ """Returns the current observation from a frame."""
227
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
228
+ return cv2.resize(frame, (self.size, self.size), interpolation=cv2.INTER_AREA)
229
+
230
+
231
+ class ScaledFloatFrame(gym.ObservationWrapper):
232
+ """Normalize observations to 0~1.
233
+
234
+ :param gym.Env env: the environment to wrap.
235
+ """
236
+
237
+ def __init__(self, env: gym.Env) -> None:
238
+ super().__init__(env)
239
+ obs_space = env.observation_space
240
+ assert isinstance(obs_space, gym.spaces.Box)
241
+ low = np.min(obs_space.low)
242
+ high = np.max(obs_space.high)
243
+ self.bias = low
244
+ self.scale = high - low
245
+ self.observation_space = gym.spaces.Box(
246
+ low=0.0,
247
+ high=1.0,
248
+ shape=obs_space.shape,
249
+ dtype=np.float32,
250
+ )
251
+
252
+ def observation(self, observation: np.ndarray) -> np.ndarray:
253
+ return (observation - self.bias) / self.scale
254
+
255
+
256
+ class ClipRewardEnv(gym.RewardWrapper):
257
+ """clips the reward to {+1, 0, -1} by its sign.
258
+
259
+ :param gym.Env env: the environment to wrap.
260
+ """
261
+
262
+ def __init__(self, env: gym.Env) -> None:
263
+ super().__init__(env)
264
+ self.reward_range = (-1, 1)
265
+
266
+ def reward(self, reward: SupportsFloat) -> int:
267
+ """Bin reward to {+1, 0, -1} by its sign. Note: np.sign(0) == 0."""
268
+ return np.sign(float(reward))
269
+
270
+
271
+ class FrameStack(gym.Wrapper):
272
+ """Stack n_frames last frames.
273
+
274
+ :param gym.Env env: the environment to wrap.
275
+ :param int n_frames: the number of frames to stack.
276
+ """
277
+
278
+ def __init__(self, env: gym.Env, n_frames: int) -> None:
279
+ super().__init__(env)
280
+ self.n_frames: int = n_frames
281
+ self.frames: deque[tuple[Any, ...]] = deque([], maxlen=n_frames)
282
+ obs_space = env.observation_space
283
+ obs_space_shape = env.observation_space.shape
284
+ assert obs_space_shape is not None
285
+ shape = (n_frames, *obs_space_shape)
286
+ assert isinstance(obs_space, gym.spaces.Box)
287
+ obs_space_dtype = get_space_dtype(obs_space)
288
+ self.observation_space = gym.spaces.Box(
289
+ low=np.min(obs_space.low),
290
+ high=np.max(obs_space.high),
291
+ shape=shape,
292
+ dtype=obs_space_dtype,
293
+ )
294
+
295
+ def reset(self, **kwargs: Any) -> tuple[np.ndarray, dict]:
296
+ obs, info, return_info = _parse_reset_result(self.env.reset(**kwargs))
297
+ for _ in range(self.n_frames):
298
+ self.frames.append(obs)
299
+ return (self._get_ob(), info) if return_info else (self._get_ob(), {})
300
+
301
+ def step(self, action: Any) -> tuple[np.ndarray, float, bool, bool, dict[str, Any]]:
302
+ step_result = self.env.step(action)
303
+ done: bool
304
+ if len(step_result) == 4:
305
+ obs, reward, done, info = step_result # type: ignore[unreachable] # mypy doesn't know that Gym version <0.26 has only 4 items (no truncation)
306
+ new_step_api = False
307
+ else:
308
+ obs, reward, term, trunc, info = step_result
309
+ new_step_api = True
310
+ self.frames.append(obs)
311
+ reward = float(reward)
312
+ if new_step_api:
313
+ return self._get_ob(), reward, term, trunc, info
314
+ return self._get_ob(), reward, done, info.get("TimeLimit.truncated", False), info
315
+
316
+ def _get_ob(self) -> np.ndarray:
317
+ # the original wrapper use `LazyFrames` but since we use np buffer,
318
+ # it has no effect
319
+ return np.stack(self.frames, axis=0)
320
+
321
+
322
+ def wrap_deepmind(
323
+ env: gym.Env,
324
+ episode_life: bool = True,
325
+ clip_rewards: bool = True,
326
+ frame_stack: int = 4,
327
+ scale: bool = False,
328
+ warp_frame: bool = True,
329
+ ) -> (
330
+ MaxAndSkipEnv
331
+ | EpisodicLifeEnv
332
+ | FireResetEnv
333
+ | WarpFrame
334
+ | ScaledFloatFrame
335
+ | ClipRewardEnv
336
+ | FrameStack
337
+ ):
338
+ """Configure environment for DeepMind-style Atari.
339
+
340
+ The observation is channel-first: (c, h, w) instead of (h, w, c).
341
+
342
+ :param env: the Atari environment to wrap.
343
+ :param bool episode_life: wrap the episode life wrapper.
344
+ :param bool clip_rewards: wrap the reward clipping wrapper.
345
+ :param int frame_stack: wrap the frame stacking wrapper.
346
+ :param bool scale: wrap the scaling observation wrapper.
347
+ :param bool warp_frame: wrap the grayscale + resize observation wrapper.
348
+ :return: the wrapped atari environment.
349
+ """
350
+ env = NoopResetEnv(env, noop_max=30)
351
+ env = MaxAndSkipEnv(env, skip=4)
352
+ assert hasattr(env.unwrapped, "get_action_meanings") # for mypy
353
+
354
+ wrapped_env: MaxAndSkipEnv | EpisodicLifeEnv | FireResetEnv | WarpFrame | ScaledFloatFrame | ClipRewardEnv | FrameStack = (
355
+ env
356
+ )
357
+ if episode_life:
358
+ wrapped_env = EpisodicLifeEnv(wrapped_env)
359
+ if "FIRE" in env.unwrapped.get_action_meanings():
360
+ wrapped_env = FireResetEnv(wrapped_env)
361
+ if warp_frame:
362
+ wrapped_env = WarpFrame(wrapped_env)
363
+ if scale:
364
+ wrapped_env = ScaledFloatFrame(wrapped_env)
365
+ if clip_rewards:
366
+ wrapped_env = ClipRewardEnv(wrapped_env)
367
+ if frame_stack:
368
+ wrapped_env = FrameStack(wrapped_env, frame_stack)
369
+ return wrapped_env
370
+
371
+
372
+ def make_atari_env(
373
+ task: str,
374
+ seed: int,
375
+ training_num: int,
376
+ test_num: int,
377
+ scale: int | bool = False,
378
+ frame_stack: int = 4,
379
+ ) -> tuple[Env, BaseVectorEnv, BaseVectorEnv]:
380
+ """Wrapper function for Atari env.
381
+
382
+ If EnvPool is installed, it will automatically switch to EnvPool's Atari env.
383
+
384
+ :return: a tuple of (single env, training envs, test envs).
385
+ """
386
+ env_factory = AtariEnvFactory(task, seed, seed + training_num, frame_stack, scale=bool(scale))
387
+ envs = env_factory.create_envs(training_num, test_num)
388
+ return envs.env, envs.train_envs, envs.test_envs
389
+
390
+
391
+ class AtariEnvFactory(EnvFactoryRegistered):
392
+ def __init__(
393
+ self,
394
+ task: str,
395
+ train_seed: int,
396
+ test_seed: int,
397
+ frame_stack: int,
398
+ scale: bool = False,
399
+ use_envpool_if_available: bool = True,
400
+ venv_type: VectorEnvType = VectorEnvType.SUBPROC_SHARED_MEM_AUTO,
401
+ ) -> None:
402
+ assert "NoFrameskip" in task
403
+ self.frame_stack = frame_stack
404
+ self.scale = scale
405
+ envpool_factory = None
406
+ if use_envpool_if_available:
407
+ if envpool_is_available:
408
+ envpool_factory = self.EnvPoolFactoryAtari(self)
409
+ log.info("Using envpool, because it available")
410
+ else:
411
+ log.info("Not using envpool, because it is not available")
412
+ super().__init__(
413
+ task=task,
414
+ train_seed=train_seed,
415
+ test_seed=test_seed,
416
+ venv_type=venv_type,
417
+ envpool_factory=envpool_factory,
418
+ )
419
+
420
+ def create_env(self, mode: EnvMode) -> gym.Env:
421
+ env = super().create_env(mode)
422
+ is_train = mode == EnvMode.TRAIN
423
+ return wrap_deepmind(
424
+ env,
425
+ episode_life=is_train,
426
+ clip_rewards=is_train,
427
+ frame_stack=self.frame_stack,
428
+ scale=self.scale,
429
+ )
430
+
431
+ class EnvPoolFactoryAtari(EnvPoolFactory):
432
+ """Atari-specific envpool creation.
433
+ Since envpool internally handles the functions that are implemented through the wrappers in `wrap_deepmind`,
434
+ it sets the creation keyword arguments accordingly.
435
+ """
436
+
437
+ def __init__(self, parent: "AtariEnvFactory") -> None:
438
+ self.parent = parent
439
+ if self.parent.scale:
440
+ warnings.warn(
441
+ "EnvPool does not include ScaledFloatFrame wrapper, "
442
+ "please compensate by scaling inside your network's forward function (e.g. `x = x / 255.0` for Atari)",
443
+ )
444
+
445
+ def _transform_task(self, task: str) -> str:
446
+ task = super()._transform_task(task)
447
+ # TODO: Maybe warn user, explain why this is needed
448
+ return task.replace("NoFrameskip-v4", "-v5")
449
+
450
+ def _transform_kwargs(self, kwargs: dict, mode: EnvMode) -> dict:
451
+ kwargs = super()._transform_kwargs(kwargs, mode)
452
+ is_train = mode == EnvMode.TRAIN
453
+ kwargs["reward_clip"] = is_train
454
+ kwargs["episodic_life"] = is_train
455
+ kwargs["stack_num"] = self.parent.frame_stack
456
+ return kwargs
457
+
458
+
459
+ class AtariEpochStopCallback(EpochStopCallback):
460
+ def __init__(self, task: str) -> None:
461
+ self.task = task
462
+
463
+ def should_stop(self, mean_rewards: float, context: TrainingContext) -> bool:
464
+ env = context.envs.env
465
+ if env.spec and env.spec.reward_threshold:
466
+ return mean_rewards >= env.spec.reward_threshold
467
+ if "Pong" in self.task:
468
+ return mean_rewards >= 20
469
+ return False
examples/atari/benchmark/BreakoutNoFrameskip-v4/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"env_step": 0, "rew": 1.489999993145466, "rew_std": 1.4842169361904596, "Agent": "c51"}, {"env_step": 100000, "rew": 7.160000157356262, "rew_std": 3.2809146348741707, "Agent": "c51"}, {"env_step": 200000, "rew": 17.44999990463257, "rew_std": 3.923327599934239, "Agent": "c51"}, {"env_step": 300000, "rew": 17.149999713897706, "rew_std": 4.067001119498604, "Agent": "c51"}, {"env_step": 400000, "rew": 25.739999961853027, "rew_std": 3.0522777522079583, "Agent": "c51"}, {"env_step": 500000, "rew": 31.940000152587892, "rew_std": 6.872292668284413, "Agent": "c51"}, {"env_step": 600000, "rew": 36.10000019073486, "rew_std": 4.652311404432131, "Agent": "c51"}, {"env_step": 700000, "rew": 44.069999885559085, "rew_std": 8.63215520480072, "Agent": "c51"}, {"env_step": 800000, "rew": 54.52999992370606, "rew_std": 6.331357911285241, "Agent": "c51"}, {"env_step": 900000, "rew": 75.43000068664551, "rew_std": 22.26365004429836, "Agent": "c51"}, {"env_step": 1000000, "rew": 73.68000030517578, "rew_std": 23.26932736029952, "Agent": "c51"}, {"env_step": 1100000, "rew": 116.59999771118164, "rew_std": 33.29786741329967, "Agent": "c51"}, {"env_step": 1200000, "rew": 118.80000152587891, "rew_std": 24.21891832224786, "Agent": "c51"}, {"env_step": 1300000, "rew": 149.72999954223633, "rew_std": 34.80965616483245, "Agent": "c51"}, {"env_step": 1400000, "rew": 155.19000167846679, "rew_std": 59.97181996860956, "Agent": "c51"}, {"env_step": 1500000, "rew": 227.85000076293946, "rew_std": 74.72283777482421, "Agent": "c51"}, {"env_step": 1600000, "rew": 224.7099994659424, "rew_std": 81.33929570370834, "Agent": "c51"}, {"env_step": 1700000, "rew": 269.3300064086914, "rew_std": 39.881227624530716, "Agent": "c51"}, {"env_step": 1800000, "rew": 294.8, "rew_std": 48.58571966090773, "Agent": "c51"}, {"env_step": 1900000, "rew": 299.05999450683595, "rew_std": 40.756084489018896, "Agent": "c51"}, {"env_step": 2000000, "rew": 306.51000213623047, "rew_std": 39.86726833847238, "Agent": "c51"}, {"env_step": 2100000, "rew": 277.07000122070315, "rew_std": 52.86110130603287, "Agent": "c51"}, {"env_step": 2200000, "rew": 329.5400024414063, "rew_std": 32.50145093140574, "Agent": "c51"}, {"env_step": 2300000, "rew": 337.4499984741211, "rew_std": 44.79243428344727, "Agent": "c51"}, {"env_step": 2400000, "rew": 364.02000122070314, "rew_std": 18.57825612929554, "Agent": "c51"}, {"env_step": 2500000, "rew": 321.4100006103516, "rew_std": 43.22805389464291, "Agent": "c51"}, {"env_step": 2600000, "rew": 361.55, "rew_std": 27.54516925199673, "Agent": "c51"}, {"env_step": 2700000, "rew": 333.14000244140624, "rew_std": 47.81977169887054, "Agent": "c51"}, {"env_step": 2800000, "rew": 322.52000579833987, "rew_std": 76.32684991482328, "Agent": "c51"}, {"env_step": 2900000, "rew": 330.7200012207031, "rew_std": 64.05682960403955, "Agent": "c51"}, {"env_step": 3000000, "rew": 365.2100006103516, "rew_std": 15.082475188932436, "Agent": "c51"}, {"env_step": 3100000, "rew": 355.52999725341795, "rew_std": 49.67731822769064, "Agent": "c51"}, {"env_step": 3200000, "rew": 365.6499969482422, "rew_std": 50.31155528215873, "Agent": "c51"}, {"env_step": 3300000, "rew": 346.89000091552737, "rew_std": 37.82696142293252, "Agent": "c51"}, {"env_step": 3400000, "rew": 337.4800048828125, "rew_std": 62.96080895743974, "Agent": "c51"}, {"env_step": 3500000, "rew": 362.2200012207031, "rew_std": 35.653072714280874, "Agent": "c51"}, {"env_step": 3600000, "rew": 333.90000305175784, "rew_std": 81.13762513523231, "Agent": "c51"}, {"env_step": 3700000, "rew": 376.25, "rew_std": 31.655335448058853, "Agent": "c51"}, {"env_step": 3800000, "rew": 362.44000244140625, "rew_std": 18.283176431620095, "Agent": "c51"}, {"env_step": 3900000, "rew": 366.27000427246094, "rew_std": 35.04446188070728, "Agent": "c51"}, {"env_step": 4000000, "rew": 382.57000122070315, "rew_std": 14.872595676225728, "Agent": "c51"}, {"env_step": 4100000, "rew": 359.1599945068359, "rew_std": 31.658272094999887, "Agent": "c51"}, {"env_step": 4200000, "rew": 360.13999938964844, "rew_std": 25.854099915610277, "Agent": "c51"}, {"env_step": 4300000, "rew": 358.4100006103516, "rew_std": 45.640715079962746, "Agent": "c51"}, {"env_step": 4400000, "rew": 375.90000305175784, "rew_std": 31.782793945259574, "Agent": "c51"}, {"env_step": 4500000, "rew": 357.9, "rew_std": 39.46456467194559, "Agent": "c51"}, {"env_step": 4600000, "rew": 403.4100036621094, "rew_std": 37.36884707172226, "Agent": "c51"}, {"env_step": 4700000, "rew": 375.38999938964844, "rew_std": 24.63223304182013, "Agent": "c51"}, {"env_step": 4800000, "rew": 345.60999908447263, "rew_std": 64.29045939613836, "Agent": "c51"}, {"env_step": 4900000, "rew": 369.19000244140625, "rew_std": 40.53828808279498, "Agent": "c51"}, {"env_step": 5000000, "rew": 328.8799976348877, "rew_std": 98.52224821427203, "Agent": "c51"}, {"env_step": 5100000, "rew": 385.2899993896484, "rew_std": 33.74096429736962, "Agent": "c51"}, {"env_step": 5200000, "rew": 378.63999938964844, "rew_std": 23.966530681765192, "Agent": "c51"}, {"env_step": 5300000, "rew": 358.99000244140626, "rew_std": 35.26937456390392, "Agent": "c51"}, {"env_step": 5400000, "rew": 367.1299987792969, "rew_std": 37.816190250907496, "Agent": "c51"}, {"env_step": 5500000, "rew": 374.85, "rew_std": 48.78137438800481, "Agent": "c51"}, {"env_step": 5600000, "rew": 396.31000366210935, "rew_std": 16.84918405865486, "Agent": "c51"}, {"env_step": 5700000, "rew": 392.25, "rew_std": 17.89766911339113, "Agent": "c51"}, {"env_step": 5800000, "rew": 371.0399993896484, "rew_std": 39.75309508083556, "Agent": "c51"}, {"env_step": 5900000, "rew": 361.20999908447266, "rew_std": 66.4414985994371, "Agent": "c51"}, {"env_step": 6000000, "rew": 379.6600006103516, "rew_std": 33.6160148677817, "Agent": "c51"}, {"env_step": 6100000, "rew": 376.65, "rew_std": 38.38656729834399, "Agent": "c51"}, {"env_step": 6200000, "rew": 401.14000244140624, "rew_std": 25.221350146221273, "Agent": "c51"}, {"env_step": 6300000, "rew": 373.94000244140625, "rew_std": 43.70732540398474, "Agent": "c51"}, {"env_step": 6400000, "rew": 383.6600006103516, "rew_std": 21.5665598368943, "Agent": "c51"}, {"env_step": 6500000, "rew": 396.2099945068359, "rew_std": 23.735648538855735, "Agent": "c51"}, {"env_step": 6600000, "rew": 379.5899993896484, "rew_std": 37.21383152538017, "Agent": "c51"}, {"env_step": 6700000, "rew": 369.11000366210936, "rew_std": 33.04576961471482, "Agent": "c51"}, {"env_step": 6800000, "rew": 384.3999969482422, "rew_std": 43.737378807281985, "Agent": "c51"}, {"env_step": 6900000, "rew": 392.86000366210936, "rew_std": 25.008686655391916, "Agent": "c51"}, {"env_step": 7000000, "rew": 388.18999938964845, "rew_std": 41.67885269592273, "Agent": "c51"}, {"env_step": 7100000, "rew": 400.7399963378906, "rew_std": 31.629359514939143, "Agent": "c51"}, {"env_step": 7200000, "rew": 392.60999755859376, "rew_std": 31.733938484043435, "Agent": "c51"}, {"env_step": 7300000, "rew": 381.43999633789065, "rew_std": 28.448867547263603, "Agent": "c51"}, {"env_step": 7400000, "rew": 383.7099975585937, "rew_std": 27.319865345659487, "Agent": "c51"}, {"env_step": 7500000, "rew": 400.85, "rew_std": 24.355008972204978, "Agent": "c51"}, {"env_step": 7600000, "rew": 370.6299987792969, "rew_std": 61.18848259333839, "Agent": "c51"}, {"env_step": 7700000, "rew": 377.75999755859374, "rew_std": 29.30734371334747, "Agent": "c51"}, {"env_step": 7800000, "rew": 366.9, "rew_std": 61.976462247217256, "Agent": "c51"}, {"env_step": 7900000, "rew": 381.92999877929685, "rew_std": 42.68440563855683, "Agent": "c51"}, {"env_step": 8000000, "rew": 389.1, "rew_std": 30.273154962392713, "Agent": "c51"}, {"env_step": 8100000, "rew": 377.89000244140624, "rew_std": 64.2143809894212, "Agent": "c51"}, {"env_step": 8200000, "rew": 369.98999786376953, "rew_std": 51.384270079498556, "Agent": "c51"}, {"env_step": 8300000, "rew": 363.1800018310547, "rew_std": 53.85413212986565, "Agent": "c51"}, {"env_step": 8400000, "rew": 402.32000122070315, "rew_std": 47.67640870874529, "Agent": "c51"}, {"env_step": 8500000, "rew": 412.0899993896484, "rew_std": 49.33683150503912, "Agent": "c51"}, {"env_step": 8600000, "rew": 362.84000396728516, "rew_std": 95.5870313016572, "Agent": "c51"}, {"env_step": 8700000, "rew": 374.8699951171875, "rew_std": 35.3107884710375, "Agent": "c51"}, {"env_step": 8800000, "rew": 387.94000244140625, "rew_std": 45.46937780145519, "Agent": "c51"}, {"env_step": 8900000, "rew": 390.63999938964844, "rew_std": 27.526135712106345, "Agent": "c51"}, {"env_step": 9000000, "rew": 389.9, "rew_std": 36.79899479363974, "Agent": "c51"}, {"env_step": 9100000, "rew": 401.82000122070315, "rew_std": 28.145932045294533, "Agent": "c51"}, {"env_step": 9200000, "rew": 378.2200042724609, "rew_std": 44.36847920124076, "Agent": "c51"}, {"env_step": 9300000, "rew": 378.5300048828125, "rew_std": 20.175735548110744, "Agent": "c51"}, {"env_step": 9400000, "rew": 411.37000427246096, "rew_std": 26.821631968299695, "Agent": "c51"}, {"env_step": 9500000, "rew": 398.6599945068359, "rew_std": 36.35720540569888, "Agent": "c51"}, {"env_step": 9600000, "rew": 386.3699981689453, "rew_std": 28.54824021627022, "Agent": "c51"}, {"env_step": 9700000, "rew": 389.52000122070314, "rew_std": 61.006069637114855, "Agent": "c51"}, {"env_step": 9800000, "rew": 412.8700012207031, "rew_std": 35.838668839394245, "Agent": "c51"}, {"env_step": 9900000, "rew": 399.7899993896484, "rew_std": 36.742630957118074, "Agent": "c51"}, {"env_step": 10000000, "rew": 393.1800048828125, "rew_std": 20.823774222066966, "Agent": "c51"}, {"env_step": 0, "rew": 2.3299999952316286, "rew_std": 1.0705605633034625, "Agent": "dqn"}, {"env_step": 100000, "rew": 7.849999904632568, "rew_std": 1.8200273963417377, "Agent": "dqn"}, {"env_step": 200000, "rew": 17.30000009536743, "rew_std": 9.143740794241733, "Agent": "dqn"}, {"env_step": 300000, "rew": 20.85, "rew_std": 4.622391089254499, "Agent": "dqn"}, {"env_step": 400000, "rew": 20.7, "rew_std": 5.0768098447216925, "Agent": "dqn"}, {"env_step": 500000, "rew": 25.080000114440917, "rew_std": 3.1577840761532077, "Agent": "dqn"}, {"env_step": 600000, "rew": 28.130000114440918, "rew_std": 3.998262178859403, "Agent": "dqn"}, {"env_step": 700000, "rew": 35.89000053405762, "rew_std": 6.575477441314717, "Agent": "dqn"}, {"env_step": 800000, "rew": 40.44000015258789, "rew_std": 3.872260440437905, "Agent": "dqn"}, {"env_step": 900000, "rew": 39.329999923706055, "rew_std": 8.570536398642876, "Agent": "dqn"}, {"env_step": 1000000, "rew": 41.73000030517578, "rew_std": 9.01854193615441, "Agent": "dqn"}, {"env_step": 1100000, "rew": 46.03999996185303, "rew_std": 13.705852687158178, "Agent": "dqn"}, {"env_step": 1200000, "rew": 46.45000019073486, "rew_std": 8.782511298663483, "Agent": "dqn"}, {"env_step": 1300000, "rew": 47.709999084472656, "rew_std": 8.820708473452349, "Agent": "dqn"}, {"env_step": 1400000, "rew": 53.69000015258789, "rew_std": 8.472597260377047, "Agent": "dqn"}, {"env_step": 1500000, "rew": 56.78999996185303, "rew_std": 19.832672991347415, "Agent": "dqn"}, {"env_step": 1600000, "rew": 47.09000072479248, "rew_std": 14.559289729405599, "Agent": "dqn"}, {"env_step": 1700000, "rew": 51.7899995803833, "rew_std": 15.683395606454836, "Agent": "dqn"}, {"env_step": 1800000, "rew": 49.360000419616696, "rew_std": 14.638525024332282, "Agent": "dqn"}, {"env_step": 1900000, "rew": 52.28000049591064, "rew_std": 13.878459272042756, "Agent": "dqn"}, {"env_step": 2000000, "rew": 50.32000026702881, "rew_std": 12.656760859312767, "Agent": "dqn"}, {"env_step": 2100000, "rew": 50.69000015258789, "rew_std": 7.424345019289965, "Agent": "dqn"}, {"env_step": 2200000, "rew": 57.699999237060545, "rew_std": 26.776631520034286, "Agent": "dqn"}, {"env_step": 2300000, "rew": 51.079999923706055, "rew_std": 16.490165999706548, "Agent": "dqn"}, {"env_step": 2400000, "rew": 56.36000061035156, "rew_std": 10.570827964178985, "Agent": "dqn"}, {"env_step": 2500000, "rew": 61.59000091552734, "rew_std": 20.03838606299226, "Agent": "dqn"}, {"env_step": 2600000, "rew": 49.81999969482422, "rew_std": 11.181932063887603, "Agent": "dqn"}, {"env_step": 2700000, "rew": 50.91000061035156, "rew_std": 6.640550189836024, "Agent": "dqn"}, {"env_step": 2800000, "rew": 51.87000007629395, "rew_std": 16.15952039351572, "Agent": "dqn"}, {"env_step": 2900000, "rew": 55.97000026702881, "rew_std": 12.618800305474606, "Agent": "dqn"}, {"env_step": 3000000, "rew": 62.70000076293945, "rew_std": 15.349006905504705, "Agent": "dqn"}, {"env_step": 3100000, "rew": 62.3, "rew_std": 7.967809614396711, "Agent": "dqn"}, {"env_step": 3200000, "rew": 57.64999961853027, "rew_std": 14.362815684542023, "Agent": "dqn"}, {"env_step": 3300000, "rew": 63.35, "rew_std": 17.445471857828533, "Agent": "dqn"}, {"env_step": 3400000, "rew": 61.999999618530275, "rew_std": 9.367175853731641, "Agent": "dqn"}, {"env_step": 3500000, "rew": 58.58999938964844, "rew_std": 7.702006216494004, "Agent": "dqn"}, {"env_step": 3600000, "rew": 59.83000030517578, "rew_std": 11.262597633065775, "Agent": "dqn"}, {"env_step": 3700000, "rew": 66.31999969482422, "rew_std": 12.887497638750386, "Agent": "dqn"}, {"env_step": 3800000, "rew": 60.06000022888183, "rew_std": 8.369014666496854, "Agent": "dqn"}, {"env_step": 3900000, "rew": 68.73999938964843, "rew_std": 16.99924625009274, "Agent": "dqn"}, {"env_step": 4000000, "rew": 65.21999893188476, "rew_std": 13.805780604536238, "Agent": "dqn"}, {"env_step": 4100000, "rew": 65.03999938964844, "rew_std": 24.91546420245774, "Agent": "dqn"}, {"env_step": 4200000, "rew": 57.69999961853027, "rew_std": 19.0262977849217, "Agent": "dqn"}, {"env_step": 4300000, "rew": 72.17000045776368, "rew_std": 17.75004520954237, "Agent": "dqn"}, {"env_step": 4400000, "rew": 69.8600009918213, "rew_std": 6.844588638105477, "Agent": "dqn"}, {"env_step": 4500000, "rew": 61.260000228881836, "rew_std": 11.948741149739522, "Agent": "dqn"}, {"env_step": 4600000, "rew": 67.55, "rew_std": 8.807752424830234, "Agent": "dqn"}, {"env_step": 4700000, "rew": 70.23999977111816, "rew_std": 8.630087095950666, "Agent": "dqn"}, {"env_step": 4800000, "rew": 72.41999969482421, "rew_std": 12.884859861619276, "Agent": "dqn"}, {"env_step": 4900000, "rew": 73.97000045776367, "rew_std": 13.834816048918906, "Agent": "dqn"}, {"env_step": 5000000, "rew": 74.23000030517578, "rew_std": 13.696208341395574, "Agent": "dqn"}, {"env_step": 5100000, "rew": 67.39999885559082, "rew_std": 15.599165755242288, "Agent": "dqn"}, {"env_step": 5200000, "rew": 85.09999961853028, "rew_std": 31.04148759266113, "Agent": "dqn"}, {"env_step": 5300000, "rew": 69.8799991607666, "rew_std": 17.921539788764512, "Agent": "dqn"}, {"env_step": 5400000, "rew": 73.52999992370606, "rew_std": 21.51474045621979, "Agent": "dqn"}, {"env_step": 5500000, "rew": 77.47999992370606, "rew_std": 13.142054243069717, "Agent": "dqn"}, {"env_step": 5600000, "rew": 81.1799991607666, "rew_std": 20.257827947382573, "Agent": "dqn"}, {"env_step": 5700000, "rew": 91.23000030517578, "rew_std": 32.801493962015556, "Agent": "dqn"}, {"env_step": 5800000, "rew": 82.73000068664551, "rew_std": 19.65411187086091, "Agent": "dqn"}, {"env_step": 5900000, "rew": 81.49000053405761, "rew_std": 18.301501103980517, "Agent": "dqn"}, {"env_step": 6000000, "rew": 81.15999946594238, "rew_std": 17.353743105571674, "Agent": "dqn"}, {"env_step": 6100000, "rew": 78.85, "rew_std": 23.848828094928958, "Agent": "dqn"}, {"env_step": 6200000, "rew": 73.11000022888183, "rew_std": 16.69673268597132, "Agent": "dqn"}, {"env_step": 6300000, "rew": 91.60000190734863, "rew_std": 27.26793083658477, "Agent": "dqn"}, {"env_step": 6400000, "rew": 91.17999992370605, "rew_std": 17.885123036862367, "Agent": "dqn"}, {"env_step": 6500000, "rew": 85.3, "rew_std": 27.91558722418007, "Agent": "dqn"}, {"env_step": 6600000, "rew": 78.09999961853028, "rew_std": 18.799307603869114, "Agent": "dqn"}, {"env_step": 6700000, "rew": 73.22999992370606, "rew_std": 15.534029981853278, "Agent": "dqn"}, {"env_step": 6800000, "rew": 98.13000106811523, "rew_std": 20.634877163905355, "Agent": "dqn"}, {"env_step": 6900000, "rew": 89.0, "rew_std": 24.171967172300295, "Agent": "dqn"}, {"env_step": 7000000, "rew": 94.03000106811524, "rew_std": 22.807151580126355, "Agent": "dqn"}, {"env_step": 7100000, "rew": 91.46000061035156, "rew_std": 23.31502638921496, "Agent": "dqn"}, {"env_step": 7200000, "rew": 83.65999984741211, "rew_std": 16.474659802642254, "Agent": "dqn"}, {"env_step": 7300000, "rew": 80.12000045776367, "rew_std": 11.930952073355787, "Agent": "dqn"}, {"env_step": 7400000, "rew": 77.42999877929688, "rew_std": 19.180929987122642, "Agent": "dqn"}, {"env_step": 7500000, "rew": 86.84999923706054, "rew_std": 17.592342521566607, "Agent": "dqn"}, {"env_step": 7600000, "rew": 82.52999992370606, "rew_std": 20.004302258249783, "Agent": "dqn"}, {"env_step": 7700000, "rew": 107.37000045776367, "rew_std": 34.55555067760397, "Agent": "dqn"}, {"env_step": 7800000, "rew": 96.06000061035157, "rew_std": 22.622474586994528, "Agent": "dqn"}, {"env_step": 7900000, "rew": 94.13999977111817, "rew_std": 28.41982322868775, "Agent": "dqn"}, {"env_step": 8000000, "rew": 101.21000061035156, "rew_std": 26.587306822196613, "Agent": "dqn"}, {"env_step": 8100000, "rew": 85.82999992370605, "rew_std": 25.36777673896879, "Agent": "dqn"}, {"env_step": 8200000, "rew": 87.62999954223633, "rew_std": 39.89198041691922, "Agent": "dqn"}, {"env_step": 8300000, "rew": 101.57999839782715, "rew_std": 41.77749993495092, "Agent": "dqn"}, {"env_step": 8400000, "rew": 93.03000068664551, "rew_std": 27.115569981755712, "Agent": "dqn"}, {"env_step": 8500000, "rew": 87.7099998474121, "rew_std": 36.714669130069105, "Agent": "dqn"}, {"env_step": 8600000, "rew": 90.95000076293945, "rew_std": 12.810327317331756, "Agent": "dqn"}, {"env_step": 8700000, "rew": 104.0099998474121, "rew_std": 32.22671128294261, "Agent": "dqn"}, {"env_step": 8800000, "rew": 103.99000091552735, "rew_std": 27.8962892258018, "Agent": "dqn"}, {"env_step": 8900000, "rew": 114.25000076293945, "rew_std": 32.67464640396138, "Agent": "dqn"}, {"env_step": 9000000, "rew": 106.80000038146973, "rew_std": 32.973262202331064, "Agent": "dqn"}, {"env_step": 9100000, "rew": 98.43000030517578, "rew_std": 20.662141740244365, "Agent": "dqn"}, {"env_step": 9200000, "rew": 88.96000099182129, "rew_std": 33.323092362700145, "Agent": "dqn"}, {"env_step": 9300000, "rew": 121.35000076293946, "rew_std": 25.42248087718444, "Agent": "dqn"}, {"env_step": 9400000, "rew": 114.03000106811524, "rew_std": 31.284407594353972, "Agent": "dqn"}, {"env_step": 9500000, "rew": 125.76000061035157, "rew_std": 36.260922820967124, "Agent": "dqn"}, {"env_step": 9600000, "rew": 100.2, "rew_std": 41.19708629230755, "Agent": "dqn"}, {"env_step": 9700000, "rew": 122.76999969482422, "rew_std": 33.774755897887985, "Agent": "dqn"}, {"env_step": 9800000, "rew": 133.53999938964844, "rew_std": 44.59944213785166, "Agent": "dqn"}, {"env_step": 9900000, "rew": 111.62000198364258, "rew_std": 32.61686090929321, "Agent": "dqn"}, {"env_step": 10000000, "rew": 119.93999862670898, "rew_std": 32.37928701114132, "Agent": "dqn"}, {"env_step": 0, "rew": 1.8899999886751175, "rew_std": 1.2739309645395656, "Agent": "fqf"}, {"env_step": 100000, "rew": 6.579999995231629, "rew_std": 2.0439177558947796, "Agent": "fqf"}, {"env_step": 200000, "rew": 15.449999904632568, "rew_std": 1.902235310873203, "Agent": "fqf"}, {"env_step": 300000, "rew": 15.630000019073487, "rew_std": 1.6285271252770628, "Agent": "fqf"}, {"env_step": 400000, "rew": 20.410000228881835, "rew_std": 2.727068191039453, "Agent": "fqf"}, {"env_step": 500000, "rew": 27.5, "rew_std": 5.404812646912962, "Agent": "fqf"}, {"env_step": 600000, "rew": 34.10999984741211, "rew_std": 5.110469271319042, "Agent": "fqf"}, {"env_step": 700000, "rew": 44.479999732971194, "rew_std": 9.641866944004914, "Agent": "fqf"}, {"env_step": 800000, "rew": 58.929999923706056, "rew_std": 10.773119278904277, "Agent": "fqf"}, {"env_step": 900000, "rew": 74.28000030517578, "rew_std": 18.03883545662945, "Agent": "fqf"}, {"env_step": 1000000, "rew": 91.27000007629394, "rew_std": 19.16142200039165, "Agent": "fqf"}, {"env_step": 1100000, "rew": 89.1100009918213, "rew_std": 13.826457342888988, "Agent": "fqf"}, {"env_step": 1200000, "rew": 94.35999984741211, "rew_std": 42.61833485770056, "Agent": "fqf"}, {"env_step": 1300000, "rew": 111.60999984741211, "rew_std": 28.842728288197964, "Agent": "fqf"}, {"env_step": 1400000, "rew": 137.2, "rew_std": 25.806123286033692, "Agent": "fqf"}, {"env_step": 1500000, "rew": 157.3999984741211, "rew_std": 31.331581823972737, "Agent": "fqf"}, {"env_step": 1600000, "rew": 166.1099998474121, "rew_std": 31.462182753954036, "Agent": "fqf"}, {"env_step": 1700000, "rew": 189.7300018310547, "rew_std": 38.91149203014107, "Agent": "fqf"}, {"env_step": 1800000, "rew": 206.3300003051758, "rew_std": 38.15114780212248, "Agent": "fqf"}, {"env_step": 1900000, "rew": 224.5, "rew_std": 38.52728870047049, "Agent": "fqf"}, {"env_step": 2000000, "rew": 222.76000061035157, "rew_std": 46.865021102153406, "Agent": "fqf"}, {"env_step": 2100000, "rew": 245.28999938964844, "rew_std": 49.371034931773835, "Agent": "fqf"}, {"env_step": 2200000, "rew": 267.70999908447266, "rew_std": 28.133131370013544, "Agent": "fqf"}, {"env_step": 2300000, "rew": 268.58999786376955, "rew_std": 49.59553152130905, "Agent": "fqf"}, {"env_step": 2400000, "rew": 247.30999908447265, "rew_std": 63.79005209586264, "Agent": "fqf"}, {"env_step": 2500000, "rew": 281.91000213623045, "rew_std": 36.61752006054675, "Agent": "fqf"}, {"env_step": 2600000, "rew": 273.11999969482423, "rew_std": 53.78060736276747, "Agent": "fqf"}, {"env_step": 2700000, "rew": 299.9100006103516, "rew_std": 30.088116602832, "Agent": "fqf"}, {"env_step": 2800000, "rew": 297.63999938964844, "rew_std": 55.8203918208197, "Agent": "fqf"}, {"env_step": 2900000, "rew": 309.13000335693357, "rew_std": 49.09570374572412, "Agent": "fqf"}, {"env_step": 3000000, "rew": 315.32999725341796, "rew_std": 43.39456033965058, "Agent": "fqf"}, {"env_step": 3100000, "rew": 279.22000274658205, "rew_std": 85.23476924161254, "Agent": "fqf"}, {"env_step": 3200000, "rew": 297.58000030517576, "rew_std": 63.087017417518, "Agent": "fqf"}, {"env_step": 3300000, "rew": 301.73999938964846, "rew_std": 49.87208136848178, "Agent": "fqf"}, {"env_step": 3400000, "rew": 275.0300010681152, "rew_std": 68.53272323238663, "Agent": "fqf"}, {"env_step": 3500000, "rew": 309.43999938964845, "rew_std": 32.276252757870395, "Agent": "fqf"}, {"env_step": 3600000, "rew": 319.67999572753905, "rew_std": 42.78061813129124, "Agent": "fqf"}, {"env_step": 3700000, "rew": 332.6600006103516, "rew_std": 33.96071915762684, "Agent": "fqf"}, {"env_step": 3800000, "rew": 355.36000061035156, "rew_std": 28.662765200015865, "Agent": "fqf"}, {"env_step": 3900000, "rew": 316.99999542236327, "rew_std": 53.55531395626551, "Agent": "fqf"}, {"env_step": 4000000, "rew": 319.78999786376954, "rew_std": 80.64893371138278, "Agent": "fqf"}, {"env_step": 4100000, "rew": 345.6599975585938, "rew_std": 25.589495375720983, "Agent": "fqf"}, {"env_step": 4200000, "rew": 338.1300048828125, "rew_std": 25.35192511248875, "Agent": "fqf"}, {"env_step": 4300000, "rew": 331.38999633789064, "rew_std": 38.176389861042445, "Agent": "fqf"}, {"env_step": 4400000, "rew": 328.5199996948242, "rew_std": 88.22566393672108, "Agent": "fqf"}, {"env_step": 4500000, "rew": 356.0900024414062, "rew_std": 26.407973243051238, "Agent": "fqf"}, {"env_step": 4600000, "rew": 330.85999755859376, "rew_std": 52.67645135396039, "Agent": "fqf"}, {"env_step": 4700000, "rew": 363.3799987792969, "rew_std": 35.14901906852587, "Agent": "fqf"}, {"env_step": 4800000, "rew": 364.2799987792969, "rew_std": 22.464898788730974, "Agent": "fqf"}, {"env_step": 4900000, "rew": 342.62999572753904, "rew_std": 31.664115177564884, "Agent": "fqf"}, {"env_step": 5000000, "rew": 326.9600006103516, "rew_std": 44.123470427909474, "Agent": "fqf"}, {"env_step": 5100000, "rew": 342.51000061035154, "rew_std": 42.72719409460211, "Agent": "fqf"}, {"env_step": 5200000, "rew": 368.5100036621094, "rew_std": 36.07728802793028, "Agent": "fqf"}, {"env_step": 5300000, "rew": 339.4, "rew_std": 37.085738197603874, "Agent": "fqf"}, {"env_step": 5400000, "rew": 339.8399963378906, "rew_std": 29.956508084411002, "Agent": "fqf"}, {"env_step": 5500000, "rew": 329.1000015258789, "rew_std": 52.50864780548441, "Agent": "fqf"}, {"env_step": 5600000, "rew": 345.49000244140626, "rew_std": 25.85855984648126, "Agent": "fqf"}, {"env_step": 5700000, "rew": 348.35999908447263, "rew_std": 53.21148820407242, "Agent": "fqf"}, {"env_step": 5800000, "rew": 344.5499984741211, "rew_std": 46.83556422879634, "Agent": "fqf"}, {"env_step": 5900000, "rew": 350.65999908447264, "rew_std": 54.664820479976136, "Agent": "fqf"}, {"env_step": 6000000, "rew": 346.4800033569336, "rew_std": 62.894860468597145, "Agent": "fqf"}, {"env_step": 6100000, "rew": 350.48999938964846, "rew_std": 26.15692816124583, "Agent": "fqf"}, {"env_step": 6200000, "rew": 362.8300048828125, "rew_std": 36.7557307025397, "Agent": "fqf"}, {"env_step": 6300000, "rew": 328.7800018310547, "rew_std": 47.65588972046255, "Agent": "fqf"}, {"env_step": 6400000, "rew": 364.5799987792969, "rew_std": 26.335103106220885, "Agent": "fqf"}, {"env_step": 6500000, "rew": 318.6900009155273, "rew_std": 56.799413057921996, "Agent": "fqf"}, {"env_step": 6600000, "rew": 371.51999816894534, "rew_std": 26.211210500163315, "Agent": "fqf"}, {"env_step": 6700000, "rew": 347.8500030517578, "rew_std": 32.235581327971936, "Agent": "fqf"}, {"env_step": 6800000, "rew": 372.7200042724609, "rew_std": 28.695146937057064, "Agent": "fqf"}, {"env_step": 6900000, "rew": 361.5699981689453, "rew_std": 43.78193476682266, "Agent": "fqf"}, {"env_step": 7000000, "rew": 317.13000030517577, "rew_std": 49.348357840104256, "Agent": "fqf"}, {"env_step": 7100000, "rew": 340.74000091552733, "rew_std": 77.68987471874023, "Agent": "fqf"}, {"env_step": 7200000, "rew": 344.6399978637695, "rew_std": 59.30083036122553, "Agent": "fqf"}, {"env_step": 7300000, "rew": 341.8500015258789, "rew_std": 40.51899094909766, "Agent": "fqf"}, {"env_step": 7400000, "rew": 359.8500030517578, "rew_std": 20.46920835454293, "Agent": "fqf"}, {"env_step": 7500000, "rew": 342.1299987792969, "rew_std": 52.97516510033074, "Agent": "fqf"}, {"env_step": 7600000, "rew": 348.57000122070315, "rew_std": 27.585034799201082, "Agent": "fqf"}, {"env_step": 7700000, "rew": 347.2500030517578, "rew_std": 27.34297686518728, "Agent": "fqf"}, {"env_step": 7800000, "rew": 345.8099990844727, "rew_std": 71.28503787650662, "Agent": "fqf"}, {"env_step": 7900000, "rew": 355.4199981689453, "rew_std": 48.30798976082001, "Agent": "fqf"}, {"env_step": 8000000, "rew": 373.85, "rew_std": 25.45906797131143, "Agent": "fqf"}, {"env_step": 8100000, "rew": 346.7200012207031, "rew_std": 34.270798637584605, "Agent": "fqf"}, {"env_step": 8200000, "rew": 365.01000213623047, "rew_std": 59.017802669324645, "Agent": "fqf"}, {"env_step": 8300000, "rew": 334.4899932861328, "rew_std": 60.103334841706335, "Agent": "fqf"}, {"env_step": 8400000, "rew": 355.2200012207031, "rew_std": 30.91270427542967, "Agent": "fqf"}, {"env_step": 8500000, "rew": 359.00999755859374, "rew_std": 34.8320641579841, "Agent": "fqf"}, {"env_step": 8600000, "rew": 336.2000045776367, "rew_std": 45.92332986368791, "Agent": "fqf"}, {"env_step": 8700000, "rew": 371.00999755859374, "rew_std": 35.20410344429364, "Agent": "fqf"}, {"env_step": 8800000, "rew": 345.88999938964844, "rew_std": 27.24296297805744, "Agent": "fqf"}, {"env_step": 8900000, "rew": 327.00999603271487, "rew_std": 62.46383398874547, "Agent": "fqf"}, {"env_step": 9000000, "rew": 367.0299957275391, "rew_std": 24.968425234757614, "Agent": "fqf"}, {"env_step": 9100000, "rew": 357.7699951171875, "rew_std": 31.722642179225197, "Agent": "fqf"}, {"env_step": 9200000, "rew": 351.0800018310547, "rew_std": 40.11283828329421, "Agent": "fqf"}, {"env_step": 9300000, "rew": 363.8500030517578, "rew_std": 21.079715798651307, "Agent": "fqf"}, {"env_step": 9400000, "rew": 342.8400054931641, "rew_std": 63.49006749721425, "Agent": "fqf"}, {"env_step": 9500000, "rew": 373.99000091552733, "rew_std": 45.288134172835484, "Agent": "fqf"}, {"env_step": 9600000, "rew": 356.8399993896484, "rew_std": 47.543644666957135, "Agent": "fqf"}, {"env_step": 9700000, "rew": 364.5300018310547, "rew_std": 23.580712727396694, "Agent": "fqf"}, {"env_step": 9800000, "rew": 382.63999633789064, "rew_std": 29.47945522804052, "Agent": "fqf"}, {"env_step": 9900000, "rew": 353.19000244140625, "rew_std": 41.2007373443112, "Agent": "fqf"}, {"env_step": 10000000, "rew": 335.73000183105466, "rew_std": 36.178617059142844, "Agent": "fqf"}, {"env_step": 0, "rew": 2.0200000025331972, "rew_std": 1.062826409813908, "Agent": "qrdqn"}, {"env_step": 100000, "rew": 11.1, "rew_std": 1.8363006019761408, "Agent": "qrdqn"}, {"env_step": 200000, "rew": 15.950000190734864, "rew_std": 2.2743131208480416, "Agent": "qrdqn"}, {"env_step": 300000, "rew": 21.6100004196167, "rew_std": 1.6908279939018895, "Agent": "qrdqn"}, {"env_step": 400000, "rew": 26.039999961853027, "rew_std": 3.166764513719337, "Agent": "qrdqn"}, {"env_step": 500000, "rew": 31.70999984741211, "rew_std": 4.500988535661596, "Agent": "qrdqn"}, {"env_step": 600000, "rew": 34.890000343322754, "rew_std": 8.884306606312624, "Agent": "qrdqn"}, {"env_step": 700000, "rew": 39.35000057220459, "rew_std": 11.433481420905531, "Agent": "qrdqn"}, {"env_step": 800000, "rew": 45.01999969482422, "rew_std": 7.258622758189306, "Agent": "qrdqn"}, {"env_step": 900000, "rew": 51.72000007629394, "rew_std": 12.04224138466468, "Agent": "qrdqn"}, {"env_step": 1000000, "rew": 48.96999988555908, "rew_std": 9.053513212898372, "Agent": "qrdqn"}, {"env_step": 1100000, "rew": 66.27000007629394, "rew_std": 17.327149763903382, "Agent": "qrdqn"}, {"env_step": 1200000, "rew": 67.5099998474121, "rew_std": 15.202332343099394, "Agent": "qrdqn"}, {"env_step": 1300000, "rew": 47.89999961853027, "rew_std": 13.158266496317525, "Agent": "qrdqn"}, {"env_step": 1400000, "rew": 54.50000057220459, "rew_std": 12.41821272111614, "Agent": "qrdqn"}, {"env_step": 1500000, "rew": 71.68000030517578, "rew_std": 12.78567937628033, "Agent": "qrdqn"}, {"env_step": 1600000, "rew": 65.53000030517578, "rew_std": 15.931104640556512, "Agent": "qrdqn"}, {"env_step": 1700000, "rew": 65.3899990081787, "rew_std": 14.402253328610442, "Agent": "qrdqn"}, {"env_step": 1800000, "rew": 69.67999954223633, "rew_std": 13.667245828132831, "Agent": "qrdqn"}, {"env_step": 1900000, "rew": 104.74000091552735, "rew_std": 41.49964546688277, "Agent": "qrdqn"}, {"env_step": 2000000, "rew": 64.65, "rew_std": 15.356839508305148, "Agent": "qrdqn"}, {"env_step": 2100000, "rew": 67.09999961853028, "rew_std": 20.951228029225863, "Agent": "qrdqn"}, {"env_step": 2200000, "rew": 81.0700008392334, "rew_std": 28.43526185117888, "Agent": "qrdqn"}, {"env_step": 2300000, "rew": 69.75000076293945, "rew_std": 15.399562151245634, "Agent": "qrdqn"}, {"env_step": 2400000, "rew": 93.77000045776367, "rew_std": 48.462461876453574, "Agent": "qrdqn"}, {"env_step": 2500000, "rew": 90.25000076293945, "rew_std": 37.74602613741299, "Agent": "qrdqn"}, {"env_step": 2600000, "rew": 90.18000106811523, "rew_std": 29.855881757158723, "Agent": "qrdqn"}, {"env_step": 2700000, "rew": 99.36000061035156, "rew_std": 40.36459324093467, "Agent": "qrdqn"}, {"env_step": 2800000, "rew": 81.06999969482422, "rew_std": 39.89363303080445, "Agent": "qrdqn"}, {"env_step": 2900000, "rew": 95.2400001525879, "rew_std": 43.06056686630658, "Agent": "qrdqn"}, {"env_step": 3000000, "rew": 104.05000076293945, "rew_std": 28.336240349879567, "Agent": "qrdqn"}, {"env_step": 3100000, "rew": 93.90999908447266, "rew_std": 27.109681802005888, "Agent": "qrdqn"}, {"env_step": 3200000, "rew": 107.66000022888184, "rew_std": 40.67018922208847, "Agent": "qrdqn"}, {"env_step": 3300000, "rew": 93.79000053405761, "rew_std": 30.64237221598378, "Agent": "qrdqn"}, {"env_step": 3400000, "rew": 108.4900001525879, "rew_std": 41.321239097052164, "Agent": "qrdqn"}, {"env_step": 3500000, "rew": 124.85, "rew_std": 42.0317792485268, "Agent": "qrdqn"}, {"env_step": 3600000, "rew": 116.13000106811523, "rew_std": 43.9727670450532, "Agent": "qrdqn"}, {"env_step": 3700000, "rew": 111.63999977111817, "rew_std": 43.274708584109035, "Agent": "qrdqn"}, {"env_step": 3800000, "rew": 106.15, "rew_std": 45.770651300224905, "Agent": "qrdqn"}, {"env_step": 3900000, "rew": 129.54000015258788, "rew_std": 46.98308583368676, "Agent": "qrdqn"}, {"env_step": 4000000, "rew": 115.51000137329102, "rew_std": 36.304806385435626, "Agent": "qrdqn"}, {"env_step": 4100000, "rew": 129.35999908447266, "rew_std": 31.217724397411512, "Agent": "qrdqn"}, {"env_step": 4200000, "rew": 135.28999938964844, "rew_std": 26.868697494551604, "Agent": "qrdqn"}, {"env_step": 4300000, "rew": 126.12999877929687, "rew_std": 41.017608258673214, "Agent": "qrdqn"}, {"env_step": 4400000, "rew": 136.70000076293945, "rew_std": 50.228299617735615, "Agent": "qrdqn"}, {"env_step": 4500000, "rew": 120.93000183105468, "rew_std": 41.47946862067106, "Agent": "qrdqn"}, {"env_step": 4600000, "rew": 138.7300003051758, "rew_std": 50.09087919140305, "Agent": "qrdqn"}, {"env_step": 4700000, "rew": 111.62000122070313, "rew_std": 28.04349582228607, "Agent": "qrdqn"}, {"env_step": 4800000, "rew": 139.99000091552733, "rew_std": 46.49149126588381, "Agent": "qrdqn"}, {"env_step": 4900000, "rew": 141.19000167846679, "rew_std": 55.482997939665566, "Agent": "qrdqn"}, {"env_step": 5000000, "rew": 141.16000061035157, "rew_std": 29.674102239383583, "Agent": "qrdqn"}, {"env_step": 5100000, "rew": 149.54000015258788, "rew_std": 40.483557547800565, "Agent": "qrdqn"}, {"env_step": 5200000, "rew": 138.81000061035155, "rew_std": 25.27221522764106, "Agent": "qrdqn"}, {"env_step": 5300000, "rew": 154.86000289916993, "rew_std": 47.20214481743621, "Agent": "qrdqn"}, {"env_step": 5400000, "rew": 152.83000259399415, "rew_std": 35.65692378910702, "Agent": "qrdqn"}, {"env_step": 5500000, "rew": 165.04999923706055, "rew_std": 52.75731800579599, "Agent": "qrdqn"}, {"env_step": 5600000, "rew": 159.33999938964843, "rew_std": 39.30949528911902, "Agent": "qrdqn"}, {"env_step": 5700000, "rew": 157.33999938964843, "rew_std": 31.022483741005583, "Agent": "qrdqn"}, {"env_step": 5800000, "rew": 133.2099983215332, "rew_std": 30.196568926146423, "Agent": "qrdqn"}, {"env_step": 5900000, "rew": 154.22000122070312, "rew_std": 54.375359375832986, "Agent": "qrdqn"}, {"env_step": 6000000, "rew": 184.93000030517578, "rew_std": 30.234021002954563, "Agent": "qrdqn"}, {"env_step": 6100000, "rew": 163.85000305175782, "rew_std": 37.09256658071057, "Agent": "qrdqn"}, {"env_step": 6200000, "rew": 174.16000061035157, "rew_std": 45.05503855203985, "Agent": "qrdqn"}, {"env_step": 6300000, "rew": 134.93999862670898, "rew_std": 28.63627883914366, "Agent": "qrdqn"}, {"env_step": 6400000, "rew": 158.54000244140624, "rew_std": 58.80484980319214, "Agent": "qrdqn"}, {"env_step": 6500000, "rew": 163.03000259399414, "rew_std": 43.68508069770034, "Agent": "qrdqn"}, {"env_step": 6600000, "rew": 171.29000091552734, "rew_std": 25.585327282680602, "Agent": "qrdqn"}, {"env_step": 6700000, "rew": 175.55999755859375, "rew_std": 40.4673989589574, "Agent": "qrdqn"}, {"env_step": 6800000, "rew": 170.24999923706054, "rew_std": 40.89010527420232, "Agent": "qrdqn"}, {"env_step": 6900000, "rew": 174.58000106811522, "rew_std": 34.85977017255225, "Agent": "qrdqn"}, {"env_step": 7000000, "rew": 197.0900016784668, "rew_std": 40.22520027724406, "Agent": "qrdqn"}, {"env_step": 7100000, "rew": 185.90000076293944, "rew_std": 40.68073380527696, "Agent": "qrdqn"}, {"env_step": 7200000, "rew": 163.37999954223633, "rew_std": 29.362010642178937, "Agent": "qrdqn"}, {"env_step": 7300000, "rew": 175.2400001525879, "rew_std": 33.284537997701975, "Agent": "qrdqn"}, {"env_step": 7400000, "rew": 173.48999938964843, "rew_std": 38.9055120846502, "Agent": "qrdqn"}, {"env_step": 7500000, "rew": 180.91999893188478, "rew_std": 32.606096613717376, "Agent": "qrdqn"}, {"env_step": 7600000, "rew": 182.53000030517578, "rew_std": 39.06146447307148, "Agent": "qrdqn"}, {"env_step": 7700000, "rew": 202.53999938964844, "rew_std": 34.642085489481985, "Agent": "qrdqn"}, {"env_step": 7800000, "rew": 198.09000091552736, "rew_std": 37.14933188731783, "Agent": "qrdqn"}, {"env_step": 7900000, "rew": 173.42000045776368, "rew_std": 46.51472327478004, "Agent": "qrdqn"}, {"env_step": 8000000, "rew": 187.71000137329102, "rew_std": 42.45332867238581, "Agent": "qrdqn"}, {"env_step": 8100000, "rew": 178.81999816894532, "rew_std": 25.60713985608123, "Agent": "qrdqn"}, {"env_step": 8200000, "rew": 186.19999923706055, "rew_std": 58.60175754971055, "Agent": "qrdqn"}, {"env_step": 8300000, "rew": 183.31999893188475, "rew_std": 40.961731898305466, "Agent": "qrdqn"}, {"env_step": 8400000, "rew": 189.95, "rew_std": 19.00927386779873, "Agent": "qrdqn"}, {"env_step": 8500000, "rew": 192.83999938964843, "rew_std": 33.04204192587639, "Agent": "qrdqn"}, {"env_step": 8600000, "rew": 202.9000015258789, "rew_std": 23.599364986605206, "Agent": "qrdqn"}, {"env_step": 8700000, "rew": 186.10999908447266, "rew_std": 46.4904581707004, "Agent": "qrdqn"}, {"env_step": 8800000, "rew": 213.19000015258788, "rew_std": 44.943285014544436, "Agent": "qrdqn"}, {"env_step": 8900000, "rew": 202.45999603271486, "rew_std": 54.62318213575417, "Agent": "qrdqn"}, {"env_step": 9000000, "rew": 163.0799997329712, "rew_std": 66.0252791544666, "Agent": "qrdqn"}, {"env_step": 9100000, "rew": 189.61999969482423, "rew_std": 31.533055330348382, "Agent": "qrdqn"}, {"env_step": 9200000, "rew": 190.22999954223633, "rew_std": 44.21936226238718, "Agent": "qrdqn"}, {"env_step": 9300000, "rew": 206.0199996948242, "rew_std": 34.16480026287263, "Agent": "qrdqn"}, {"env_step": 9400000, "rew": 194.15, "rew_std": 31.170764052493578, "Agent": "qrdqn"}, {"env_step": 9500000, "rew": 198.09000091552736, "rew_std": 34.949348176785655, "Agent": "qrdqn"}, {"env_step": 9600000, "rew": 184.69000167846679, "rew_std": 36.43675343515362, "Agent": "qrdqn"}, {"env_step": 9700000, "rew": 177.53000259399414, "rew_std": 49.71995694653478, "Agent": "qrdqn"}, {"env_step": 9800000, "rew": 188.76999893188477, "rew_std": 54.15346713496224, "Agent": "qrdqn"}, {"env_step": 9900000, "rew": 228.30999908447265, "rew_std": 27.33559443989839, "Agent": "qrdqn"}, {"env_step": 10000000, "rew": 208.64000244140624, "rew_std": 42.412903043388575, "Agent": "qrdqn"}, {"env_step": 0, "rew": 2.0299999952316283, "rew_std": 0.6356886257408865, "Agent": "iqn"}, {"env_step": 100000, "rew": 10.880000019073487, "rew_std": 1.0235234666900987, "Agent": "iqn"}, {"env_step": 200000, "rew": 16.200000095367432, "rew_std": 2.41453951471406, "Agent": "iqn"}, {"env_step": 300000, "rew": 19.000000286102296, "rew_std": 3.212164382918219, "Agent": "iqn"}, {"env_step": 400000, "rew": 24.710000038146973, "rew_std": 4.045108260414139, "Agent": "iqn"}, {"env_step": 500000, "rew": 36.970000457763675, "rew_std": 6.761220184785156, "Agent": "iqn"}, {"env_step": 600000, "rew": 57.630000305175784, "rew_std": 17.247496577908297, "Agent": "iqn"}, {"env_step": 700000, "rew": 59.85, "rew_std": 15.685677915496768, "Agent": "iqn"}, {"env_step": 800000, "rew": 79.85, "rew_std": 15.545884075479117, "Agent": "iqn"}, {"env_step": 900000, "rew": 85.95, "rew_std": 18.034815034946483, "Agent": "iqn"}, {"env_step": 1000000, "rew": 105.31999969482422, "rew_std": 23.10228556713175, "Agent": "iqn"}, {"env_step": 1100000, "rew": 114.6, "rew_std": 42.868006897695565, "Agent": "iqn"}, {"env_step": 1200000, "rew": 100.68000030517578, "rew_std": 21.099801867092722, "Agent": "iqn"}, {"env_step": 1300000, "rew": 115.17000045776368, "rew_std": 24.481870429507214, "Agent": "iqn"}, {"env_step": 1400000, "rew": 148.81999969482422, "rew_std": 46.95697798485056, "Agent": "iqn"}, {"env_step": 1500000, "rew": 174.43000259399415, "rew_std": 65.91861982472467, "Agent": "iqn"}, {"env_step": 1600000, "rew": 177.42999954223632, "rew_std": 37.79439115810842, "Agent": "iqn"}, {"env_step": 1700000, "rew": 212.91999893188478, "rew_std": 38.73584462706583, "Agent": "iqn"}, {"env_step": 1800000, "rew": 239.47000274658203, "rew_std": 44.23055960487321, "Agent": "iqn"}, {"env_step": 1900000, "rew": 224.89999542236328, "rew_std": 35.47142441734377, "Agent": "iqn"}, {"env_step": 2000000, "rew": 255.11000366210936, "rew_std": 70.83536414074327, "Agent": "iqn"}, {"env_step": 2100000, "rew": 268.1800018310547, "rew_std": 46.30323741929793, "Agent": "iqn"}, {"env_step": 2200000, "rew": 251.55999755859375, "rew_std": 84.66489362401488, "Agent": "iqn"}, {"env_step": 2300000, "rew": 272.47999420166013, "rew_std": 42.157886622987164, "Agent": "iqn"}, {"env_step": 2400000, "rew": 315.0400024414063, "rew_std": 35.556779061887944, "Agent": "iqn"}, {"env_step": 2500000, "rew": 288.16000213623045, "rew_std": 75.40411670056966, "Agent": "iqn"}, {"env_step": 2600000, "rew": 282.24999847412107, "rew_std": 67.52896065218202, "Agent": "iqn"}, {"env_step": 2700000, "rew": 314.6600006103516, "rew_std": 33.30751400267159, "Agent": "iqn"}, {"env_step": 2800000, "rew": 292.0699981689453, "rew_std": 70.06501237480857, "Agent": "iqn"}, {"env_step": 2900000, "rew": 280.75, "rew_std": 53.51295587493006, "Agent": "iqn"}, {"env_step": 3000000, "rew": 320.45, "rew_std": 25.30736054112851, "Agent": "iqn"}, {"env_step": 3100000, "rew": 314.0599990844727, "rew_std": 44.75654636439621, "Agent": "iqn"}, {"env_step": 3200000, "rew": 331.3699981689453, "rew_std": 23.177711488651255, "Agent": "iqn"}, {"env_step": 3300000, "rew": 330.7899993896484, "rew_std": 30.686655193589775, "Agent": "iqn"}, {"env_step": 3400000, "rew": 302.63000030517577, "rew_std": 62.505184496442915, "Agent": "iqn"}, {"env_step": 3500000, "rew": 312.2699996948242, "rew_std": 60.86172947653031, "Agent": "iqn"}, {"env_step": 3600000, "rew": 337.6700012207031, "rew_std": 35.29467556366819, "Agent": "iqn"}, {"env_step": 3700000, "rew": 299.9600006103516, "rew_std": 71.5931756482553, "Agent": "iqn"}, {"env_step": 3800000, "rew": 246.19000167846679, "rew_std": 87.48656258153838, "Agent": "iqn"}, {"env_step": 3900000, "rew": 326.85999755859376, "rew_std": 51.01209852522548, "Agent": "iqn"}, {"env_step": 4000000, "rew": 307.7100006103516, "rew_std": 58.77869107819418, "Agent": "iqn"}, {"env_step": 4100000, "rew": 311.3300033569336, "rew_std": 66.51625456766375, "Agent": "iqn"}, {"env_step": 4200000, "rew": 300.1900009155273, "rew_std": 35.21126152649786, "Agent": "iqn"}, {"env_step": 4300000, "rew": 307.98000183105466, "rew_std": 31.851334098626857, "Agent": "iqn"}, {"env_step": 4400000, "rew": 318.60999755859376, "rew_std": 43.45476757896472, "Agent": "iqn"}, {"env_step": 4500000, "rew": 327.53999786376954, "rew_std": 62.97847529940396, "Agent": "iqn"}, {"env_step": 4600000, "rew": 282.2199996948242, "rew_std": 65.37763576262309, "Agent": "iqn"}, {"env_step": 4700000, "rew": 298.72000274658205, "rew_std": 53.23556836698092, "Agent": "iqn"}, {"env_step": 4800000, "rew": 338.88000030517577, "rew_std": 64.96912888006219, "Agent": "iqn"}, {"env_step": 4900000, "rew": 320.7, "rew_std": 66.17025120314409, "Agent": "iqn"}, {"env_step": 5000000, "rew": 318.7500061035156, "rew_std": 40.19410702152574, "Agent": "iqn"}, {"env_step": 5100000, "rew": 295.86999664306643, "rew_std": 70.69885222375017, "Agent": "iqn"}, {"env_step": 5200000, "rew": 344.55999755859375, "rew_std": 31.723278369793565, "Agent": "iqn"}, {"env_step": 5300000, "rew": 328.7099975585937, "rew_std": 42.69288989467297, "Agent": "iqn"}, {"env_step": 5400000, "rew": 296.60999755859376, "rew_std": 66.42924294236374, "Agent": "iqn"}, {"env_step": 5500000, "rew": 283.00999755859374, "rew_std": 70.532708487894, "Agent": "iqn"}, {"env_step": 5600000, "rew": 355.9200042724609, "rew_std": 22.674958525127415, "Agent": "iqn"}, {"env_step": 5700000, "rew": 301.33000030517576, "rew_std": 88.94783620229133, "Agent": "iqn"}, {"env_step": 5800000, "rew": 293.93999633789065, "rew_std": 52.81626973902586, "Agent": "iqn"}, {"env_step": 5900000, "rew": 353.3300018310547, "rew_std": 36.446288269528395, "Agent": "iqn"}, {"env_step": 6000000, "rew": 305.2799987792969, "rew_std": 54.018343190981945, "Agent": "iqn"}, {"env_step": 6100000, "rew": 349.9500030517578, "rew_std": 24.089635436458245, "Agent": "iqn"}, {"env_step": 6200000, "rew": 328.2199966430664, "rew_std": 48.67830877960062, "Agent": "iqn"}, {"env_step": 6300000, "rew": 343.49000244140626, "rew_std": 31.285020475109025, "Agent": "iqn"}, {"env_step": 6400000, "rew": 303.22000274658205, "rew_std": 55.57735070139985, "Agent": "iqn"}, {"env_step": 6500000, "rew": 321.5300003051758, "rew_std": 54.123212058813415, "Agent": "iqn"}, {"env_step": 6600000, "rew": 348.51000061035154, "rew_std": 38.049976345782774, "Agent": "iqn"}, {"env_step": 6700000, "rew": 352.5299987792969, "rew_std": 29.52311476032704, "Agent": "iqn"}, {"env_step": 6800000, "rew": 352.8199981689453, "rew_std": 27.302040489543003, "Agent": "iqn"}, {"env_step": 6900000, "rew": 325.95, "rew_std": 32.27061521262411, "Agent": "iqn"}, {"env_step": 7000000, "rew": 337.38000335693357, "rew_std": 57.94110508353129, "Agent": "iqn"}, {"env_step": 7100000, "rew": 337.1800064086914, "rew_std": 57.54656752151562, "Agent": "iqn"}, {"env_step": 7200000, "rew": 347.05, "rew_std": 29.28273921056375, "Agent": "iqn"}, {"env_step": 7300000, "rew": 336.5800018310547, "rew_std": 25.209833569339764, "Agent": "iqn"}, {"env_step": 7400000, "rew": 314.8700004577637, "rew_std": 83.15007163125841, "Agent": "iqn"}, {"env_step": 7500000, "rew": 346.7200012207031, "rew_std": 32.34303330425867, "Agent": "iqn"}, {"env_step": 7600000, "rew": 314.9699996948242, "rew_std": 51.35753429441995, "Agent": "iqn"}, {"env_step": 7700000, "rew": 312.4199966430664, "rew_std": 58.537693420573355, "Agent": "iqn"}, {"env_step": 7800000, "rew": 328.15, "rew_std": 45.2648705332799, "Agent": "iqn"}, {"env_step": 7900000, "rew": 346.00999755859374, "rew_std": 24.54332676008246, "Agent": "iqn"}, {"env_step": 8000000, "rew": 321.4400039672852, "rew_std": 50.142358701938754, "Agent": "iqn"}, {"env_step": 8100000, "rew": 337.76999816894534, "rew_std": 42.3190257331327, "Agent": "iqn"}, {"env_step": 8200000, "rew": 333.6700012207031, "rew_std": 29.37312182285308, "Agent": "iqn"}, {"env_step": 8300000, "rew": 324.7100006103516, "rew_std": 52.949946115785885, "Agent": "iqn"}, {"env_step": 8400000, "rew": 333.55999755859375, "rew_std": 33.45080520039474, "Agent": "iqn"}, {"env_step": 8500000, "rew": 331.4600036621094, "rew_std": 48.38483477275051, "Agent": "iqn"}, {"env_step": 8600000, "rew": 324.1599975585938, "rew_std": 49.630536861003044, "Agent": "iqn"}, {"env_step": 8700000, "rew": 351.7799987792969, "rew_std": 27.03962312243994, "Agent": "iqn"}, {"env_step": 8800000, "rew": 339.1300018310547, "rew_std": 53.111113054337935, "Agent": "iqn"}, {"env_step": 8900000, "rew": 329.3400024414062, "rew_std": 39.404931065916315, "Agent": "iqn"}, {"env_step": 9000000, "rew": 337.5, "rew_std": 31.20205217899507, "Agent": "iqn"}, {"env_step": 9100000, "rew": 312.82000122070315, "rew_std": 81.47756719359116, "Agent": "iqn"}, {"env_step": 9200000, "rew": 318.81000366210935, "rew_std": 61.65786917648312, "Agent": "iqn"}, {"env_step": 9300000, "rew": 355.5400024414063, "rew_std": 16.46543217809965, "Agent": "iqn"}, {"env_step": 9400000, "rew": 316.15000305175784, "rew_std": 69.99261936144228, "Agent": "iqn"}, {"env_step": 9500000, "rew": 346.1699981689453, "rew_std": 31.56960876454528, "Agent": "iqn"}, {"env_step": 9600000, "rew": 338.1199951171875, "rew_std": 45.98025387262979, "Agent": "iqn"}, {"env_step": 9700000, "rew": 329.63000335693357, "rew_std": 42.38771307072964, "Agent": "iqn"}, {"env_step": 9800000, "rew": 331.4, "rew_std": 39.38928853624626, "Agent": "iqn"}, {"env_step": 9900000, "rew": 318.9999969482422, "rew_std": 43.779264475855875, "Agent": "iqn"}, {"env_step": 10000000, "rew": 345.3199981689453, "rew_std": 30.032006070982476, "Agent": "iqn"}, {"env_step": 0, "rew": 1.7999999798834323, "rew_std": 1.461506037350442, "Agent": "rainbow"}, {"env_step": 100000, "rew": 2.789999971538782, "rew_std": 2.4865437409115354, "Agent": "rainbow"}, {"env_step": 200000, "rew": 12.420000076293945, "rew_std": 1.9046260138234519, "Agent": "rainbow"}, {"env_step": 300000, "rew": 17.679999923706056, "rew_std": 3.491647063886927, "Agent": "rainbow"}, {"env_step": 400000, "rew": 22.370000076293945, "rew_std": 3.6133226874735795, "Agent": "rainbow"}, {"env_step": 500000, "rew": 25.05, "rew_std": 3.6381997573897396, "Agent": "rainbow"}, {"env_step": 600000, "rew": 35.28999977111816, "rew_std": 4.300104297171605, "Agent": "rainbow"}, {"env_step": 700000, "rew": 38.70999984741211, "rew_std": 5.35918838448342, "Agent": "rainbow"}, {"env_step": 800000, "rew": 50.54999961853027, "rew_std": 8.483189257414512, "Agent": "rainbow"}, {"env_step": 900000, "rew": 60.01000061035156, "rew_std": 10.841259439227493, "Agent": "rainbow"}, {"env_step": 1000000, "rew": 72.13000030517578, "rew_std": 14.579921126080473, "Agent": "rainbow"}, {"env_step": 1100000, "rew": 105.4900016784668, "rew_std": 41.347393284288735, "Agent": "rainbow"}, {"env_step": 1200000, "rew": 99.45000076293945, "rew_std": 25.770341675180582, "Agent": "rainbow"}, {"env_step": 1300000, "rew": 104.1, "rew_std": 24.05194318506914, "Agent": "rainbow"}, {"env_step": 1400000, "rew": 122.69000091552735, "rew_std": 33.97927298843038, "Agent": "rainbow"}, {"env_step": 1500000, "rew": 188.78999786376954, "rew_std": 47.31898888179236, "Agent": "rainbow"}, {"env_step": 1600000, "rew": 230.06000213623048, "rew_std": 45.668113003632385, "Agent": "rainbow"}, {"env_step": 1700000, "rew": 222.32000122070312, "rew_std": 69.96254538894237, "Agent": "rainbow"}, {"env_step": 1800000, "rew": 264.9599975585937, "rew_std": 44.13764946857046, "Agent": "rainbow"}, {"env_step": 1900000, "rew": 304.4, "rew_std": 44.836877219962034, "Agent": "rainbow"}, {"env_step": 2000000, "rew": 327.15000305175784, "rew_std": 42.61066349224782, "Agent": "rainbow"}, {"env_step": 2100000, "rew": 329.89000091552737, "rew_std": 47.86103795081456, "Agent": "rainbow"}, {"env_step": 2200000, "rew": 332.67000274658204, "rew_std": 49.45685362258107, "Agent": "rainbow"}, {"env_step": 2300000, "rew": 291.48999786376953, "rew_std": 72.64021500933792, "Agent": "rainbow"}, {"env_step": 2400000, "rew": 334.86000061035156, "rew_std": 78.4941559533148, "Agent": "rainbow"}, {"env_step": 2500000, "rew": 333.99000091552733, "rew_std": 75.35539320141508, "Agent": "rainbow"}, {"env_step": 2600000, "rew": 353.07000122070315, "rew_std": 51.661630785667796, "Agent": "rainbow"}, {"env_step": 2700000, "rew": 335.8000061035156, "rew_std": 65.27762192648794, "Agent": "rainbow"}, {"env_step": 2800000, "rew": 381.63999938964844, "rew_std": 33.01331023409123, "Agent": "rainbow"}, {"env_step": 2900000, "rew": 370.3, "rew_std": 29.175224311445085, "Agent": "rainbow"}, {"env_step": 3000000, "rew": 396.6999969482422, "rew_std": 40.15956167187139, "Agent": "rainbow"}, {"env_step": 3100000, "rew": 378.6699981689453, "rew_std": 44.56986018654734, "Agent": "rainbow"}, {"env_step": 3200000, "rew": 367.72999877929686, "rew_std": 48.201724085448, "Agent": "rainbow"}, {"env_step": 3300000, "rew": 373.5100036621094, "rew_std": 50.91597587464675, "Agent": "rainbow"}, {"env_step": 3400000, "rew": 373.94000549316405, "rew_std": 45.228182826120296, "Agent": "rainbow"}, {"env_step": 3500000, "rew": 386.4099945068359, "rew_std": 24.17674431981728, "Agent": "rainbow"}, {"env_step": 3600000, "rew": 405.77000732421874, "rew_std": 42.80668625101679, "Agent": "rainbow"}, {"env_step": 3700000, "rew": 407.6, "rew_std": 48.41737864076781, "Agent": "rainbow"}, {"env_step": 3800000, "rew": 408.2899993896484, "rew_std": 31.628448022764232, "Agent": "rainbow"}, {"env_step": 3900000, "rew": 390.32000122070315, "rew_std": 28.6771582193927, "Agent": "rainbow"}, {"env_step": 4000000, "rew": 371.5400054931641, "rew_std": 32.82566550832653, "Agent": "rainbow"}, {"env_step": 4100000, "rew": 374.39000244140624, "rew_std": 44.559746097958346, "Agent": "rainbow"}, {"env_step": 4200000, "rew": 373.72999877929686, "rew_std": 42.524114470487525, "Agent": "rainbow"}, {"env_step": 4300000, "rew": 405.72999877929686, "rew_std": 21.966842238579567, "Agent": "rainbow"}, {"env_step": 4400000, "rew": 382.3500061035156, "rew_std": 30.38684579249488, "Agent": "rainbow"}, {"env_step": 4500000, "rew": 411.3499984741211, "rew_std": 69.50523878087984, "Agent": "rainbow"}, {"env_step": 4600000, "rew": 417.1599975585938, "rew_std": 63.06370162906102, "Agent": "rainbow"}, {"env_step": 4700000, "rew": 386.0800048828125, "rew_std": 28.676079172356957, "Agent": "rainbow"}, {"env_step": 4800000, "rew": 398.8399993896484, "rew_std": 30.87983841532746, "Agent": "rainbow"}, {"env_step": 4900000, "rew": 396.72999877929686, "rew_std": 33.71180556312966, "Agent": "rainbow"}, {"env_step": 5000000, "rew": 384.89000244140624, "rew_std": 33.94638292724067, "Agent": "rainbow"}, {"env_step": 5100000, "rew": 400.55, "rew_std": 46.40784802725254, "Agent": "rainbow"}, {"env_step": 5200000, "rew": 419.52000122070314, "rew_std": 28.008704651064, "Agent": "rainbow"}, {"env_step": 5300000, "rew": 407.02000122070314, "rew_std": 16.72254065818173, "Agent": "rainbow"}, {"env_step": 5400000, "rew": 414.0499969482422, "rew_std": 41.30225599948548, "Agent": "rainbow"}, {"env_step": 5500000, "rew": 398.7799987792969, "rew_std": 47.40503931200679, "Agent": "rainbow"}, {"env_step": 5600000, "rew": 403.820002746582, "rew_std": 64.9688057624937, "Agent": "rainbow"}, {"env_step": 5700000, "rew": 389.6999984741211, "rew_std": 72.28918253740537, "Agent": "rainbow"}, {"env_step": 5800000, "rew": 403.6700012207031, "rew_std": 38.4149988299815, "Agent": "rainbow"}, {"env_step": 5900000, "rew": 409.89000244140624, "rew_std": 24.034117466481515, "Agent": "rainbow"}, {"env_step": 6000000, "rew": 399.0400024414063, "rew_std": 39.877192287649464, "Agent": "rainbow"}, {"env_step": 6100000, "rew": 384.40000305175784, "rew_std": 56.328160747865354, "Agent": "rainbow"}, {"env_step": 6200000, "rew": 403.8999969482422, "rew_std": 33.781714504496996, "Agent": "rainbow"}, {"env_step": 6300000, "rew": 397.63999938964844, "rew_std": 26.051031347673337, "Agent": "rainbow"}, {"env_step": 6400000, "rew": 409.3799987792969, "rew_std": 15.132536949771454, "Agent": "rainbow"}, {"env_step": 6500000, "rew": 408.31000061035155, "rew_std": 22.57203611858315, "Agent": "rainbow"}, {"env_step": 6600000, "rew": 415.3700012207031, "rew_std": 38.56120137444495, "Agent": "rainbow"}, {"env_step": 6700000, "rew": 431.77000122070314, "rew_std": 47.137184174849416, "Agent": "rainbow"}, {"env_step": 6800000, "rew": 404.22999877929686, "rew_std": 34.66502740832278, "Agent": "rainbow"}, {"env_step": 6900000, "rew": 401.00999755859374, "rew_std": 22.66051127275429, "Agent": "rainbow"}, {"env_step": 7000000, "rew": 393.8199981689453, "rew_std": 48.31937448033659, "Agent": "rainbow"}, {"env_step": 7100000, "rew": 417.7, "rew_std": 50.47389404778245, "Agent": "rainbow"}, {"env_step": 7200000, "rew": 395.7499969482422, "rew_std": 40.349476442782, "Agent": "rainbow"}, {"env_step": 7300000, "rew": 417.0899963378906, "rew_std": 18.112780812448758, "Agent": "rainbow"}, {"env_step": 7400000, "rew": 383.18999633789065, "rew_std": 47.911194905935396, "Agent": "rainbow"}, {"env_step": 7500000, "rew": 410.9200073242188, "rew_std": 70.87141742592017, "Agent": "rainbow"}, {"env_step": 7600000, "rew": 420.8400024414062, "rew_std": 34.12468989742961, "Agent": "rainbow"}, {"env_step": 7700000, "rew": 422.22999572753906, "rew_std": 21.98477505605195, "Agent": "rainbow"}, {"env_step": 7800000, "rew": 423.4200042724609, "rew_std": 43.2258487041201, "Agent": "rainbow"}, {"env_step": 7900000, "rew": 418.1700012207031, "rew_std": 35.33822585932713, "Agent": "rainbow"}, {"env_step": 8000000, "rew": 396.6499938964844, "rew_std": 39.98475122704287, "Agent": "rainbow"}, {"env_step": 8100000, "rew": 431.2100006103516, "rew_std": 43.29839137758222, "Agent": "rainbow"}, {"env_step": 8200000, "rew": 388.6700042724609, "rew_std": 42.25809445722602, "Agent": "rainbow"}, {"env_step": 8300000, "rew": 371.5899963378906, "rew_std": 76.53331656114038, "Agent": "rainbow"}, {"env_step": 8400000, "rew": 408.69000244140625, "rew_std": 24.624358338524335, "Agent": "rainbow"}, {"env_step": 8500000, "rew": 409.0399993896484, "rew_std": 41.573793217544875, "Agent": "rainbow"}, {"env_step": 8600000, "rew": 401.95, "rew_std": 37.11682649365457, "Agent": "rainbow"}, {"env_step": 8700000, "rew": 420.77000122070314, "rew_std": 44.74215708867204, "Agent": "rainbow"}, {"env_step": 8800000, "rew": 405.2499969482422, "rew_std": 31.691043996566787, "Agent": "rainbow"}, {"env_step": 8900000, "rew": 398.4700012207031, "rew_std": 40.46342211972141, "Agent": "rainbow"}, {"env_step": 9000000, "rew": 440.4, "rew_std": 50.116695811843044, "Agent": "rainbow"}, {"env_step": 9100000, "rew": 422.5, "rew_std": 35.31455535189779, "Agent": "rainbow"}, {"env_step": 9200000, "rew": 428.8199981689453, "rew_std": 33.58162360678032, "Agent": "rainbow"}, {"env_step": 9300000, "rew": 425.3400024414062, "rew_std": 46.15141555774316, "Agent": "rainbow"}, {"env_step": 9400000, "rew": 440.2799987792969, "rew_std": 42.420672496482084, "Agent": "rainbow"}, {"env_step": 9500000, "rew": 410.85, "rew_std": 6.367617048401184, "Agent": "rainbow"}, {"env_step": 9600000, "rew": 399.57000122070315, "rew_std": 53.69724380529421, "Agent": "rainbow"}, {"env_step": 9700000, "rew": 431.38999938964844, "rew_std": 40.52948488877201, "Agent": "rainbow"}, {"env_step": 9800000, "rew": 422.23999938964846, "rew_std": 37.25512766393269, "Agent": "rainbow"}, {"env_step": 9900000, "rew": 413.8299987792969, "rew_std": 36.612840411302244, "Agent": "rainbow"}, {"env_step": 10000000, "rew": 392.0399993896484, "rew_std": 48.50192720302268, "Agent": "rainbow"}, {"env_step": 0, "rew": 2.0299999833106996, "rew_std": 0.8764131095183902, "Agent": "ppo"}, {"env_step": 100000, "rew": 2.680000030994415, "rew_std": 1.5315352171604617, "Agent": "ppo"}, {"env_step": 200000, "rew": 6.869999933242798, "rew_std": 2.2427883234590684, "Agent": "ppo"}, {"env_step": 300000, "rew": 8.350000023841858, "rew_std": 2.62154539898698, "Agent": "ppo"}, {"env_step": 400000, "rew": 10.77000002861023, "rew_std": 3.155328811734828, "Agent": "ppo"}, {"env_step": 500000, "rew": 11.309999942779541, "rew_std": 2.5762180106126547, "Agent": "ppo"}, {"env_step": 600000, "rew": 13.859999895095825, "rew_std": 4.651494279703967, "Agent": "ppo"}, {"env_step": 700000, "rew": 14.749999761581421, "rew_std": 4.220485482312834, "Agent": "ppo"}, {"env_step": 800000, "rew": 15.349999904632568, "rew_std": 4.1898090655449005, "Agent": "ppo"}, {"env_step": 900000, "rew": 18.020000171661376, "rew_std": 4.536915203735637, "Agent": "ppo"}, {"env_step": 1000000, "rew": 17.020000171661376, "rew_std": 4.455962464888489, "Agent": "ppo"}, {"env_step": 1100000, "rew": 20.690000343322755, "rew_std": 8.05238462382638, "Agent": "ppo"}, {"env_step": 1200000, "rew": 22.689999961853026, "rew_std": 12.304588638832776, "Agent": "ppo"}, {"env_step": 1300000, "rew": 20.55999975204468, "rew_std": 4.387299621179701, "Agent": "ppo"}, {"env_step": 1400000, "rew": 19.94000005722046, "rew_std": 4.290501381090037, "Agent": "ppo"}, {"env_step": 1500000, "rew": 26.379999923706055, "rew_std": 10.657748376402598, "Agent": "ppo"}, {"env_step": 1600000, "rew": 23.750000190734863, "rew_std": 4.385715448611054, "Agent": "ppo"}, {"env_step": 1700000, "rew": 23.659999656677247, "rew_std": 4.599826003557162, "Agent": "ppo"}, {"env_step": 1800000, "rew": 24.960000133514406, "rew_std": 4.907586145252241, "Agent": "ppo"}, {"env_step": 1900000, "rew": 27.629999732971193, "rew_std": 5.699657718554115, "Agent": "ppo"}, {"env_step": 2000000, "rew": 25.119999885559082, "rew_std": 2.981207745714502, "Agent": "ppo"}, {"env_step": 2100000, "rew": 24.629999923706055, "rew_std": 3.647204546502502, "Agent": "ppo"}, {"env_step": 2200000, "rew": 27.63999996185303, "rew_std": 3.726714114078771, "Agent": "ppo"}, {"env_step": 2300000, "rew": 28.329999923706055, "rew_std": 4.171102659408543, "Agent": "ppo"}, {"env_step": 2400000, "rew": 28.530000305175783, "rew_std": 3.5877709920260523, "Agent": "ppo"}, {"env_step": 2500000, "rew": 32.079999923706055, "rew_std": 4.9793168787475475, "Agent": "ppo"}, {"env_step": 2600000, "rew": 35.150000190734865, "rew_std": 6.757550860011463, "Agent": "ppo"}, {"env_step": 2700000, "rew": 35.24000015258789, "rew_std": 13.677953506506757, "Agent": "ppo"}, {"env_step": 2800000, "rew": 37.55, "rew_std": 9.215123739752018, "Agent": "ppo"}, {"env_step": 2900000, "rew": 36.089999771118165, "rew_std": 4.728942780362954, "Agent": "ppo"}, {"env_step": 3000000, "rew": 42.98000030517578, "rew_std": 19.57303269533935, "Agent": "ppo"}, {"env_step": 3100000, "rew": 37.360000419616696, "rew_std": 12.54338152629255, "Agent": "ppo"}, {"env_step": 3200000, "rew": 40.380000305175784, "rew_std": 10.776437119519981, "Agent": "ppo"}, {"env_step": 3300000, "rew": 38.110000419616696, "rew_std": 12.645825732034167, "Agent": "ppo"}, {"env_step": 3400000, "rew": 42.21999931335449, "rew_std": 9.1218199543614, "Agent": "ppo"}, {"env_step": 3500000, "rew": 50.119999694824216, "rew_std": 14.80836362588282, "Agent": "ppo"}, {"env_step": 3600000, "rew": 47.400000190734865, "rew_std": 10.068664307564164, "Agent": "ppo"}, {"env_step": 3700000, "rew": 45.4399995803833, "rew_std": 9.41564705130973, "Agent": "ppo"}, {"env_step": 3800000, "rew": 53.88000068664551, "rew_std": 16.57653842254833, "Agent": "ppo"}, {"env_step": 3900000, "rew": 61.64000015258789, "rew_std": 24.900129246081427, "Agent": "ppo"}, {"env_step": 4000000, "rew": 56.04999923706055, "rew_std": 14.419240647637906, "Agent": "ppo"}, {"env_step": 4100000, "rew": 55.7, "rew_std": 20.825513362350105, "Agent": "ppo"}, {"env_step": 4200000, "rew": 54.29000072479248, "rew_std": 21.086083955047158, "Agent": "ppo"}, {"env_step": 4300000, "rew": 60.97000007629394, "rew_std": 23.419352948854787, "Agent": "ppo"}, {"env_step": 4400000, "rew": 61.32000160217285, "rew_std": 24.429810367430697, "Agent": "ppo"}, {"env_step": 4500000, "rew": 62.31000022888183, "rew_std": 21.309597897936026, "Agent": "ppo"}, {"env_step": 4600000, "rew": 67.86999893188477, "rew_std": 26.248467597744845, "Agent": "ppo"}, {"env_step": 4700000, "rew": 66.7, "rew_std": 24.40237644035704, "Agent": "ppo"}, {"env_step": 4800000, "rew": 82.43000106811523, "rew_std": 39.014153045648264, "Agent": "ppo"}, {"env_step": 4900000, "rew": 82.50999946594239, "rew_std": 29.639110602215194, "Agent": "ppo"}, {"env_step": 5000000, "rew": 89.46999969482422, "rew_std": 36.290717341380685, "Agent": "ppo"}, {"env_step": 5100000, "rew": 75.40999946594238, "rew_std": 19.553385764445366, "Agent": "ppo"}, {"env_step": 5200000, "rew": 91.71999969482422, "rew_std": 33.941354984622286, "Agent": "ppo"}, {"env_step": 5300000, "rew": 95.90999984741211, "rew_std": 34.3326784841907, "Agent": "ppo"}, {"env_step": 5400000, "rew": 101.17000045776368, "rew_std": 48.307889695724214, "Agent": "ppo"}, {"env_step": 5500000, "rew": 93.73999977111816, "rew_std": 32.80829688751657, "Agent": "ppo"}, {"env_step": 5600000, "rew": 101.56000099182128, "rew_std": 38.65323253336432, "Agent": "ppo"}, {"env_step": 5700000, "rew": 119.71000213623047, "rew_std": 58.63711387604412, "Agent": "ppo"}, {"env_step": 5800000, "rew": 121.56000022888183, "rew_std": 52.67867263607598, "Agent": "ppo"}, {"env_step": 5900000, "rew": 128.3799991607666, "rew_std": 58.80482705685627, "Agent": "ppo"}, {"env_step": 6000000, "rew": 120.53999824523926, "rew_std": 48.01154252771422, "Agent": "ppo"}, {"env_step": 6100000, "rew": 122.3899990081787, "rew_std": 49.0965679103651, "Agent": "ppo"}, {"env_step": 6200000, "rew": 133.99999961853027, "rew_std": 60.90436501831146, "Agent": "ppo"}, {"env_step": 6300000, "rew": 140.52000198364257, "rew_std": 69.07889716111299, "Agent": "ppo"}, {"env_step": 6400000, "rew": 137.21999893188476, "rew_std": 62.32881643967556, "Agent": "ppo"}, {"env_step": 6500000, "rew": 146.8299991607666, "rew_std": 55.37363915857669, "Agent": "ppo"}, {"env_step": 6600000, "rew": 139.81000213623048, "rew_std": 74.04937024552305, "Agent": "ppo"}, {"env_step": 6700000, "rew": 149.6099994659424, "rew_std": 71.83785305550174, "Agent": "ppo"}, {"env_step": 6800000, "rew": 135.49000091552733, "rew_std": 55.006754667730945, "Agent": "ppo"}, {"env_step": 6900000, "rew": 147.45999984741212, "rew_std": 59.91933138185291, "Agent": "ppo"}, {"env_step": 7000000, "rew": 165.1099994659424, "rew_std": 73.1342120883567, "Agent": "ppo"}, {"env_step": 7100000, "rew": 174.0099983215332, "rew_std": 66.24739028991273, "Agent": "ppo"}, {"env_step": 7200000, "rew": 177.29999847412108, "rew_std": 73.95540152011542, "Agent": "ppo"}, {"env_step": 7300000, "rew": 175.92999992370605, "rew_std": 76.0113898211075, "Agent": "ppo"}, {"env_step": 7400000, "rew": 164.42999877929688, "rew_std": 77.33506176445988, "Agent": "ppo"}, {"env_step": 7500000, "rew": 173.9599983215332, "rew_std": 84.44470542302061, "Agent": "ppo"}, {"env_step": 7600000, "rew": 171.43999900817872, "rew_std": 83.88048894373154, "Agent": "ppo"}, {"env_step": 7700000, "rew": 199.14999923706054, "rew_std": 94.75587743328266, "Agent": "ppo"}, {"env_step": 7800000, "rew": 186.2400001525879, "rew_std": 77.16537397952645, "Agent": "ppo"}, {"env_step": 7900000, "rew": 199.35, "rew_std": 79.58159406938299, "Agent": "ppo"}, {"env_step": 8000000, "rew": 210.05000190734864, "rew_std": 90.49879749595148, "Agent": "ppo"}, {"env_step": 8100000, "rew": 212.7500015258789, "rew_std": 88.85424210708028, "Agent": "ppo"}, {"env_step": 8200000, "rew": 210.8900001525879, "rew_std": 73.44615975377866, "Agent": "ppo"}, {"env_step": 8300000, "rew": 219.910001373291, "rew_std": 100.7312432120665, "Agent": "ppo"}, {"env_step": 8400000, "rew": 237.1999984741211, "rew_std": 88.27977955944947, "Agent": "ppo"}, {"env_step": 8500000, "rew": 235.44000015258788, "rew_std": 82.60257787044871, "Agent": "ppo"}, {"env_step": 8600000, "rew": 217.410001373291, "rew_std": 82.26707245141274, "Agent": "ppo"}, {"env_step": 8700000, "rew": 239.08999938964843, "rew_std": 80.68150053430362, "Agent": "ppo"}, {"env_step": 8800000, "rew": 228.78000259399414, "rew_std": 78.50268738950463, "Agent": "ppo"}, {"env_step": 8900000, "rew": 242.50999908447267, "rew_std": 88.02333190391393, "Agent": "ppo"}, {"env_step": 9000000, "rew": 230.4200012207031, "rew_std": 98.88399222725009, "Agent": "ppo"}, {"env_step": 9100000, "rew": 254.71000213623046, "rew_std": 74.41610668817124, "Agent": "ppo"}, {"env_step": 9200000, "rew": 258.9599975585937, "rew_std": 88.94478371137306, "Agent": "ppo"}, {"env_step": 9300000, "rew": 248.64999694824218, "rew_std": 86.56067064571964, "Agent": "ppo"}, {"env_step": 9400000, "rew": 247.660001373291, "rew_std": 92.0610693574401, "Agent": "ppo"}, {"env_step": 9500000, "rew": 276.51000289916993, "rew_std": 81.18026107244279, "Agent": "ppo"}, {"env_step": 9600000, "rew": 258.3400054931641, "rew_std": 82.19079470003253, "Agent": "ppo"}, {"env_step": 9700000, "rew": 265.7800018310547, "rew_std": 71.35941141595737, "Agent": "ppo"}, {"env_step": 9800000, "rew": 279.3699981689453, "rew_std": 81.73140051743914, "Agent": "ppo"}, {"env_step": 9900000, "rew": 283.01000061035154, "rew_std": 74.29673322328118, "Agent": "ppo"}, {"env_step": 10000000, "rew": 272.84999923706056, "rew_std": 87.10014073751415, "Agent": "ppo"}]
examples/atari/benchmark/EnduroNoFrameskip-v4/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"env_step": 0, "rew": 0.0800000011920929, "rew_std": 0.24000000357627865, "Agent": "c51"}, {"env_step": 100000, "rew": 0.0, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 200000, "rew": 0.0, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 300000, "rew": 0.1, "rew_std": 0.30000000000000004, "Agent": "c51"}, {"env_step": 400000, "rew": 0.21000000238418579, "rew_std": 0.35623026856774925, "Agent": "c51"}, {"env_step": 500000, "rew": 0.05000000074505806, "rew_std": 0.12041594758226036, "Agent": "c51"}, {"env_step": 600000, "rew": 0.31999999061226847, "rew_std": 0.927146130289149, "Agent": "c51"}, {"env_step": 700000, "rew": 0.7299999989569187, "rew_std": 1.0354226172474177, "Agent": "c51"}, {"env_step": 800000, "rew": 29.999999809265137, "rew_std": 22.8517390602112, "Agent": "c51"}, {"env_step": 900000, "rew": 84.33000020980835, "rew_std": 38.11215200504169, "Agent": "c51"}, {"env_step": 1000000, "rew": 134.27000045776367, "rew_std": 33.76175580023914, "Agent": "c51"}, {"env_step": 1100000, "rew": 195.3500015258789, "rew_std": 55.070539310018276, "Agent": "c51"}, {"env_step": 1200000, "rew": 305.8000015258789, "rew_std": 63.623845163113806, "Agent": "c51"}, {"env_step": 1300000, "rew": 341.6900039672852, "rew_std": 75.15398169731095, "Agent": "c51"}, {"env_step": 1400000, "rew": 439.62000427246096, "rew_std": 35.53360923105612, "Agent": "c51"}, {"env_step": 1500000, "rew": 448.17999877929685, "rew_std": 63.31653151381171, "Agent": "c51"}, {"env_step": 1600000, "rew": 483.0900024414062, "rew_std": 63.598508175034475, "Agent": "c51"}, {"env_step": 1700000, "rew": 479.4499938964844, "rew_std": 60.719817637295435, "Agent": "c51"}, {"env_step": 1800000, "rew": 477.8299987792969, "rew_std": 88.50112258719895, "Agent": "c51"}, {"env_step": 1900000, "rew": 516.6200012207031, "rew_std": 90.65022983110897, "Agent": "c51"}, {"env_step": 2000000, "rew": 500.8199981689453, "rew_std": 56.8060386594065, "Agent": "c51"}, {"env_step": 2100000, "rew": 539.5200012207031, "rew_std": 80.30648599564611, "Agent": "c51"}, {"env_step": 2200000, "rew": 592.730014038086, "rew_std": 96.93099005249192, "Agent": "c51"}, {"env_step": 2300000, "rew": 573.3399963378906, "rew_std": 91.49156422648889, "Agent": "c51"}, {"env_step": 2400000, "rew": 606.1099975585937, "rew_std": 68.63315724977679, "Agent": "c51"}, {"env_step": 2500000, "rew": 652.9599975585937, "rew_std": 85.58606896755197, "Agent": "c51"}, {"env_step": 2600000, "rew": 655.4, "rew_std": 81.90140506345105, "Agent": "c51"}, {"env_step": 2700000, "rew": 625.2999969482422, "rew_std": 85.18681205945472, "Agent": "c51"}, {"env_step": 2800000, "rew": 637.7900024414063, "rew_std": 122.54666760609061, "Agent": "c51"}, {"env_step": 2900000, "rew": 644.1, "rew_std": 85.42630507564513, "Agent": "c51"}, {"env_step": 3000000, "rew": 710.5900024414062, "rew_std": 75.28909772977295, "Agent": "c51"}, {"env_step": 3100000, "rew": 671.3799987792969, "rew_std": 56.7536538058374, "Agent": "c51"}, {"env_step": 3200000, "rew": 668.2599975585938, "rew_std": 65.81738352088684, "Agent": "c51"}, {"env_step": 3300000, "rew": 690.7899993896484, "rew_std": 84.55681861003309, "Agent": "c51"}, {"env_step": 3400000, "rew": 738.3400024414062, "rew_std": 101.42283074745313, "Agent": "c51"}, {"env_step": 3500000, "rew": 730.4300048828125, "rew_std": 90.93792521834833, "Agent": "c51"}, {"env_step": 3600000, "rew": 742.3700012207031, "rew_std": 98.71111614905327, "Agent": "c51"}, {"env_step": 3700000, "rew": 708.8199981689453, "rew_std": 130.55409140026177, "Agent": "c51"}, {"env_step": 3800000, "rew": 705.9700012207031, "rew_std": 75.86588955018891, "Agent": "c51"}, {"env_step": 3900000, "rew": 755.3899963378906, "rew_std": 111.82682749974384, "Agent": "c51"}, {"env_step": 4000000, "rew": 792.8599975585937, "rew_std": 100.65659388596904, "Agent": "c51"}, {"env_step": 4100000, "rew": 780.4700012207031, "rew_std": 89.4066144108773, "Agent": "c51"}, {"env_step": 4200000, "rew": 749.0600036621094, "rew_std": 89.14610115546856, "Agent": "c51"}, {"env_step": 4300000, "rew": 735.3100067138672, "rew_std": 105.2854315881353, "Agent": "c51"}, {"env_step": 4400000, "rew": 794.9, "rew_std": 94.90966229691644, "Agent": "c51"}, {"env_step": 4500000, "rew": 775.8700012207031, "rew_std": 86.57670898287867, "Agent": "c51"}, {"env_step": 4600000, "rew": 764.4599975585937, "rew_std": 121.75587907672723, "Agent": "c51"}, {"env_step": 4700000, "rew": 761.55, "rew_std": 62.056058351897406, "Agent": "c51"}, {"env_step": 4800000, "rew": 746.5799987792968, "rew_std": 130.65757158616265, "Agent": "c51"}, {"env_step": 4900000, "rew": 792.0400085449219, "rew_std": 92.14410963183664, "Agent": "c51"}, {"env_step": 5000000, "rew": 784.8700012207031, "rew_std": 76.30846325078628, "Agent": "c51"}, {"env_step": 5100000, "rew": 806.9400024414062, "rew_std": 89.7122211065443, "Agent": "c51"}, {"env_step": 5200000, "rew": 809.7999938964844, "rew_std": 96.43909531778648, "Agent": "c51"}, {"env_step": 5300000, "rew": 801.8500061035156, "rew_std": 71.23311713783924, "Agent": "c51"}, {"env_step": 5400000, "rew": 865.3399963378906, "rew_std": 87.65904211044649, "Agent": "c51"}, {"env_step": 5500000, "rew": 800.1799987792969, "rew_std": 120.17872453215695, "Agent": "c51"}, {"env_step": 5600000, "rew": 808.1899963378906, "rew_std": 114.39077140355394, "Agent": "c51"}, {"env_step": 5700000, "rew": 787.6900024414062, "rew_std": 137.37429836391115, "Agent": "c51"}, {"env_step": 5800000, "rew": 817.6800048828125, "rew_std": 75.91765398683945, "Agent": "c51"}, {"env_step": 5900000, "rew": 788.95, "rew_std": 112.15058353811531, "Agent": "c51"}, {"env_step": 6000000, "rew": 824.4900024414062, "rew_std": 83.1460697372063, "Agent": "c51"}, {"env_step": 6100000, "rew": 791.3400024414062, "rew_std": 71.07509644467352, "Agent": "c51"}, {"env_step": 6200000, "rew": 852.5599975585938, "rew_std": 102.26750645543113, "Agent": "c51"}, {"env_step": 6300000, "rew": 791.0799957275391, "rew_std": 148.64477463928537, "Agent": "c51"}, {"env_step": 6400000, "rew": 799.7700012207031, "rew_std": 94.79903258127105, "Agent": "c51"}, {"env_step": 6500000, "rew": 856.9799987792969, "rew_std": 98.21994199980757, "Agent": "c51"}, {"env_step": 6600000, "rew": 830.3799987792969, "rew_std": 92.69931723386792, "Agent": "c51"}, {"env_step": 6700000, "rew": 828.5700012207031, "rew_std": 117.4840294040638, "Agent": "c51"}, {"env_step": 6800000, "rew": 836.3399963378906, "rew_std": 107.67462291584121, "Agent": "c51"}, {"env_step": 6900000, "rew": 809.05, "rew_std": 86.39843789136866, "Agent": "c51"}, {"env_step": 7000000, "rew": 802.610009765625, "rew_std": 85.19054082178417, "Agent": "c51"}, {"env_step": 7100000, "rew": 821.9500061035156, "rew_std": 84.06559770155239, "Agent": "c51"}, {"env_step": 7200000, "rew": 846.0100036621094, "rew_std": 109.6511176842256, "Agent": "c51"}, {"env_step": 7300000, "rew": 753.2699981689453, "rew_std": 118.18144461063531, "Agent": "c51"}, {"env_step": 7400000, "rew": 862.4699951171875, "rew_std": 94.61919535944561, "Agent": "c51"}, {"env_step": 7500000, "rew": 855.6, "rew_std": 83.0109175169287, "Agent": "c51"}, {"env_step": 7600000, "rew": 804.8099975585938, "rew_std": 87.84749020213859, "Agent": "c51"}, {"env_step": 7700000, "rew": 879.1099975585937, "rew_std": 124.99047443353965, "Agent": "c51"}, {"env_step": 7800000, "rew": 861.0999938964844, "rew_std": 149.99051826966058, "Agent": "c51"}, {"env_step": 7900000, "rew": 840.1100158691406, "rew_std": 74.45142853212236, "Agent": "c51"}, {"env_step": 8000000, "rew": 838.1, "rew_std": 81.488879742754, "Agent": "c51"}, {"env_step": 8100000, "rew": 853.1600036621094, "rew_std": 125.11934723061991, "Agent": "c51"}, {"env_step": 8200000, "rew": 883.8700012207031, "rew_std": 110.8634912939346, "Agent": "c51"}, {"env_step": 8300000, "rew": 843.6199951171875, "rew_std": 110.07341220660334, "Agent": "c51"}, {"env_step": 8400000, "rew": 833.9500122070312, "rew_std": 166.85491541704317, "Agent": "c51"}, {"env_step": 8500000, "rew": 883.5900085449218, "rew_std": 120.01639086371644, "Agent": "c51"}, {"env_step": 8600000, "rew": 816.3799987792969, "rew_std": 93.30947260374052, "Agent": "c51"}, {"env_step": 8700000, "rew": 816.5599975585938, "rew_std": 90.72039308816316, "Agent": "c51"}, {"env_step": 8800000, "rew": 885.2699951171875, "rew_std": 84.88027385527451, "Agent": "c51"}, {"env_step": 8900000, "rew": 940.8200012207031, "rew_std": 133.91121568644473, "Agent": "c51"}, {"env_step": 9000000, "rew": 858.2299926757812, "rew_std": 106.44362694927028, "Agent": "c51"}, {"env_step": 9100000, "rew": 843.5200012207031, "rew_std": 68.82380650364031, "Agent": "c51"}, {"env_step": 9200000, "rew": 847.75, "rew_std": 129.47580625363784, "Agent": "c51"}, {"env_step": 9300000, "rew": 898.089990234375, "rew_std": 74.57461814213535, "Agent": "c51"}, {"env_step": 9400000, "rew": 837.9999938964844, "rew_std": 75.13513732170806, "Agent": "c51"}, {"env_step": 9500000, "rew": 900.9, "rew_std": 78.32506786062443, "Agent": "c51"}, {"env_step": 9600000, "rew": 832.4200134277344, "rew_std": 69.69465721794408, "Agent": "c51"}, {"env_step": 9700000, "rew": 889.0700012207031, "rew_std": 126.08315874519296, "Agent": "c51"}, {"env_step": 9800000, "rew": 873.6100036621094, "rew_std": 72.68277945780702, "Agent": "c51"}, {"env_step": 9900000, "rew": 796.860009765625, "rew_std": 83.10054074269745, "Agent": "c51"}, {"env_step": 10000000, "rew": 821.8199951171875, "rew_std": 135.73986691667577, "Agent": "c51"}, {"env_step": 0, "rew": 0.010000000149011612, "rew_std": 0.03000000044703483, "Agent": "dqn"}, {"env_step": 100000, "rew": 0.28999999687075617, "rew_std": 0.6774215679654139, "Agent": "dqn"}, {"env_step": 200000, "rew": 0.44000001028180125, "rew_std": 0.8392854380207859, "Agent": "dqn"}, {"env_step": 300000, "rew": 0.3599999964237213, "rew_std": 0.8284925931025058, "Agent": "dqn"}, {"env_step": 400000, "rew": 0.33999999538064, "rew_std": 0.6873135967539007, "Agent": "dqn"}, {"env_step": 500000, "rew": 0.11000000089406967, "rew_std": 0.18138357257301754, "Agent": "dqn"}, {"env_step": 600000, "rew": 0.2100000001490116, "rew_std": 0.4548626165857307, "Agent": "dqn"}, {"env_step": 700000, "rew": 8.389999697357416, "rew_std": 23.108285476400564, "Agent": "dqn"}, {"env_step": 800000, "rew": 98.94999904632569, "rew_std": 35.660067729885725, "Agent": "dqn"}, {"env_step": 900000, "rew": 164.40999908447264, "rew_std": 59.0228520750357, "Agent": "dqn"}, {"env_step": 1000000, "rew": 210.4000030517578, "rew_std": 57.88502705780608, "Agent": "dqn"}, {"env_step": 1100000, "rew": 300.38000030517577, "rew_std": 82.23668084181749, "Agent": "dqn"}, {"env_step": 1200000, "rew": 302.7600036621094, "rew_std": 103.46174434569636, "Agent": "dqn"}, {"env_step": 1300000, "rew": 433.91000213623045, "rew_std": 126.5024948103054, "Agent": "dqn"}, {"env_step": 1400000, "rew": 416.14000244140624, "rew_std": 145.09603673682304, "Agent": "dqn"}, {"env_step": 1500000, "rew": 472.1600006103516, "rew_std": 86.80930065353702, "Agent": "dqn"}, {"env_step": 1600000, "rew": 536.65, "rew_std": 132.84745000480228, "Agent": "dqn"}, {"env_step": 1700000, "rew": 511.6300048828125, "rew_std": 113.56259400514695, "Agent": "dqn"}, {"env_step": 1800000, "rew": 559.5200012207031, "rew_std": 95.26979029881092, "Agent": "dqn"}, {"env_step": 1900000, "rew": 504.64000244140624, "rew_std": 183.2878214612098, "Agent": "dqn"}, {"env_step": 2000000, "rew": 574.7700103759765, "rew_std": 78.04244399982899, "Agent": "dqn"}, {"env_step": 2100000, "rew": 531.539998626709, "rew_std": 230.03735295552605, "Agent": "dqn"}, {"env_step": 2200000, "rew": 584.3100006103516, "rew_std": 116.97556069856971, "Agent": "dqn"}, {"env_step": 2300000, "rew": 609.7900024414063, "rew_std": 76.25380786006397, "Agent": "dqn"}, {"env_step": 2400000, "rew": 601.6499938964844, "rew_std": 156.2863846625289, "Agent": "dqn"}, {"env_step": 2500000, "rew": 614.8899978637695, "rew_std": 188.88530877823078, "Agent": "dqn"}, {"env_step": 2600000, "rew": 585.7100036621093, "rew_std": 183.29359497607433, "Agent": "dqn"}, {"env_step": 2700000, "rew": 681.8800048828125, "rew_std": 179.32764210720728, "Agent": "dqn"}, {"env_step": 2800000, "rew": 593.8799987792969, "rew_std": 178.6578949602182, "Agent": "dqn"}, {"env_step": 2900000, "rew": 685.2, "rew_std": 118.27089032922618, "Agent": "dqn"}, {"env_step": 3000000, "rew": 683.2299926757812, "rew_std": 131.34903655399373, "Agent": "dqn"}, {"env_step": 3100000, "rew": 662.1999938964843, "rew_std": 109.7032239820281, "Agent": "dqn"}, {"env_step": 3200000, "rew": 701.2099975585937, "rew_std": 88.04874320833147, "Agent": "dqn"}, {"env_step": 3300000, "rew": 688.9199981689453, "rew_std": 126.02541709175058, "Agent": "dqn"}, {"env_step": 3400000, "rew": 636.0799987792968, "rew_std": 172.2193654656943, "Agent": "dqn"}, {"env_step": 3500000, "rew": 653.7300018310547, "rew_std": 166.93894277813533, "Agent": "dqn"}, {"env_step": 3600000, "rew": 684.3899963378906, "rew_std": 172.87680391908185, "Agent": "dqn"}, {"env_step": 3700000, "rew": 643.3400039672852, "rew_std": 180.69860945189737, "Agent": "dqn"}, {"env_step": 3800000, "rew": 601.6399993896484, "rew_std": 220.07335285006258, "Agent": "dqn"}, {"env_step": 3900000, "rew": 787.8099914550781, "rew_std": 150.5371983171373, "Agent": "dqn"}, {"env_step": 4000000, "rew": 709.3800048828125, "rew_std": 144.69985159836313, "Agent": "dqn"}, {"env_step": 4100000, "rew": 764.1300018310546, "rew_std": 195.3602195608862, "Agent": "dqn"}, {"env_step": 4200000, "rew": 680.2700012207031, "rew_std": 210.1577440975007, "Agent": "dqn"}, {"env_step": 4300000, "rew": 705.5600036621094, "rew_std": 216.64282568222822, "Agent": "dqn"}, {"env_step": 4400000, "rew": 808.1700073242188, "rew_std": 171.82676781938977, "Agent": "dqn"}, {"env_step": 4500000, "rew": 715.6900006294251, "rew_std": 325.7469276625226, "Agent": "dqn"}, {"env_step": 4600000, "rew": 732.2800018310547, "rew_std": 201.57531656345432, "Agent": "dqn"}, {"env_step": 4700000, "rew": 786.0399963378907, "rew_std": 166.10157244455863, "Agent": "dqn"}, {"env_step": 4800000, "rew": 786.3699920654296, "rew_std": 178.73539138167627, "Agent": "dqn"}, {"env_step": 4900000, "rew": 775.7399993896485, "rew_std": 224.3126871078825, "Agent": "dqn"}, {"env_step": 5000000, "rew": 837.7899932861328, "rew_std": 153.09056362847045, "Agent": "dqn"}, {"env_step": 5100000, "rew": 830.0400085449219, "rew_std": 160.10900182067942, "Agent": "dqn"}, {"env_step": 5200000, "rew": 823.5699981689453, "rew_std": 194.98995547995364, "Agent": "dqn"}, {"env_step": 5300000, "rew": 855.3900024414063, "rew_std": 140.50763881053481, "Agent": "dqn"}, {"env_step": 5400000, "rew": 894.0799865722656, "rew_std": 140.23513752874317, "Agent": "dqn"}, {"env_step": 5500000, "rew": 833.1599899291992, "rew_std": 252.01737545461143, "Agent": "dqn"}, {"env_step": 5600000, "rew": 810.3499969482422, "rew_std": 203.2235554729935, "Agent": "dqn"}, {"env_step": 5700000, "rew": 725.3200134277344, "rew_std": 319.200193375417, "Agent": "dqn"}, {"env_step": 5800000, "rew": 766.3399963378906, "rew_std": 245.59766336856708, "Agent": "dqn"}, {"env_step": 5900000, "rew": 824.45, "rew_std": 157.8227175018482, "Agent": "dqn"}, {"env_step": 6000000, "rew": 839.1499938964844, "rew_std": 267.6931744720739, "Agent": "dqn"}, {"env_step": 6100000, "rew": 911.7200012207031, "rew_std": 149.13539534554963, "Agent": "dqn"}, {"env_step": 6200000, "rew": 865.1299987792969, "rew_std": 151.98633776803706, "Agent": "dqn"}, {"env_step": 6300000, "rew": 701.3800109863281, "rew_std": 231.76907968297863, "Agent": "dqn"}, {"env_step": 6400000, "rew": 848.5299987792969, "rew_std": 142.20331551456655, "Agent": "dqn"}, {"env_step": 6500000, "rew": 857.1699829101562, "rew_std": 245.273054002329, "Agent": "dqn"}, {"env_step": 6600000, "rew": 872.0099975585938, "rew_std": 275.1938175427823, "Agent": "dqn"}, {"env_step": 6700000, "rew": 780.0400024414063, "rew_std": 218.67946501129768, "Agent": "dqn"}, {"env_step": 6800000, "rew": 972.5299926757813, "rew_std": 84.68238747454869, "Agent": "dqn"}, {"env_step": 6900000, "rew": 839.6300048828125, "rew_std": 172.06233073457074, "Agent": "dqn"}, {"env_step": 7000000, "rew": 765.7000030517578, "rew_std": 228.73327046919016, "Agent": "dqn"}, {"env_step": 7100000, "rew": 803.6699981689453, "rew_std": 223.20593976154984, "Agent": "dqn"}, {"env_step": 7200000, "rew": 869.5399948120117, "rew_std": 296.05559318233026, "Agent": "dqn"}, {"env_step": 7300000, "rew": 899.0700073242188, "rew_std": 223.90129458785196, "Agent": "dqn"}, {"env_step": 7400000, "rew": 894.3500122070312, "rew_std": 124.86718242625085, "Agent": "dqn"}, {"env_step": 7500000, "rew": 844.4800048828125, "rew_std": 178.28963708114927, "Agent": "dqn"}, {"env_step": 7600000, "rew": 832.0400024414063, "rew_std": 169.4274077668312, "Agent": "dqn"}, {"env_step": 7700000, "rew": 792.3100036621094, "rew_std": 230.68153832883345, "Agent": "dqn"}, {"env_step": 7800000, "rew": 803.5799963474274, "rew_std": 309.8821414828576, "Agent": "dqn"}, {"env_step": 7900000, "rew": 673.1000030517578, "rew_std": 227.6657908497509, "Agent": "dqn"}, {"env_step": 8000000, "rew": 902.8799987792969, "rew_std": 131.25283923863736, "Agent": "dqn"}, {"env_step": 8100000, "rew": 724.4699996948242, "rew_std": 295.4643426314042, "Agent": "dqn"}, {"env_step": 8200000, "rew": 927.2100036621093, "rew_std": 143.6894016940865, "Agent": "dqn"}, {"env_step": 8300000, "rew": 942.3600006103516, "rew_std": 285.8560329981964, "Agent": "dqn"}, {"env_step": 8400000, "rew": 851.1799987792969, "rew_std": 200.8229014095921, "Agent": "dqn"}, {"env_step": 8500000, "rew": 901.9700012207031, "rew_std": 160.8610738450723, "Agent": "dqn"}, {"env_step": 8600000, "rew": 871.3400024414062, "rew_std": 190.1946167222594, "Agent": "dqn"}, {"env_step": 8700000, "rew": 833.9000122070313, "rew_std": 240.40292224599622, "Agent": "dqn"}, {"env_step": 8800000, "rew": 869.95, "rew_std": 184.55665785734556, "Agent": "dqn"}, {"env_step": 8900000, "rew": 875.4000030517578, "rew_std": 285.9085977204755, "Agent": "dqn"}, {"env_step": 9000000, "rew": 867.3599975585937, "rew_std": 361.56061577070335, "Agent": "dqn"}, {"env_step": 9100000, "rew": 856.3100036621094, "rew_std": 315.8472236982946, "Agent": "dqn"}, {"env_step": 9200000, "rew": 856.9499938964843, "rew_std": 170.14471559957494, "Agent": "dqn"}, {"env_step": 9300000, "rew": 888.9199951171875, "rew_std": 168.72139042881585, "Agent": "dqn"}, {"env_step": 9400000, "rew": 866.0400131225585, "rew_std": 299.502088920062, "Agent": "dqn"}, {"env_step": 9500000, "rew": 840.8400024414062, "rew_std": 331.5126113915152, "Agent": "dqn"}, {"env_step": 9600000, "rew": 807.6199981689454, "rew_std": 295.85304518327024, "Agent": "dqn"}, {"env_step": 9700000, "rew": 997.8700073242187, "rew_std": 180.62872163215707, "Agent": "dqn"}, {"env_step": 9800000, "rew": 902.3699951171875, "rew_std": 191.78846507427704, "Agent": "dqn"}, {"env_step": 9900000, "rew": 832.2099884033203, "rew_std": 279.22019714027266, "Agent": "dqn"}, {"env_step": 10000000, "rew": 833.1200012207031, "rew_std": 180.39020450524953, "Agent": "dqn"}, {"env_step": 0, "rew": 0.010000000149011612, "rew_std": 0.03000000044703483, "Agent": "fqf"}, {"env_step": 100000, "rew": 0.4000000074505806, "rew_std": 0.5949790043492437, "Agent": "fqf"}, {"env_step": 200000, "rew": 0.5199999868869781, "rew_std": 0.9031057265445962, "Agent": "fqf"}, {"env_step": 300000, "rew": 0.020000000298023225, "rew_std": 0.04000000059604644, "Agent": "fqf"}, {"env_step": 400000, "rew": 0.1600000001490116, "rew_std": 0.4476605856920158, "Agent": "fqf"}, {"env_step": 500000, "rew": 0.06999999880790711, "rew_std": 0.2099999964237213, "Agent": "fqf"}, {"env_step": 600000, "rew": 3.8100000239908693, "rew_std": 5.602936729313963, "Agent": "fqf"}, {"env_step": 700000, "rew": 12.430000038444996, "rew_std": 19.787372405641634, "Agent": "fqf"}, {"env_step": 800000, "rew": 85.2499984741211, "rew_std": 63.42670047163025, "Agent": "fqf"}, {"env_step": 900000, "rew": 133.19000046253205, "rew_std": 70.73937261722692, "Agent": "fqf"}, {"env_step": 1000000, "rew": 305.75999908447267, "rew_std": 103.32499176000272, "Agent": "fqf"}, {"env_step": 1100000, "rew": 413.0800033569336, "rew_std": 109.4271524578977, "Agent": "fqf"}, {"env_step": 1200000, "rew": 447.47999572753906, "rew_std": 130.462502495816, "Agent": "fqf"}, {"env_step": 1300000, "rew": 518.3400054931641, "rew_std": 135.58270493310644, "Agent": "fqf"}, {"env_step": 1400000, "rew": 657.4699951171875, "rew_std": 146.84401743315414, "Agent": "fqf"}, {"env_step": 1500000, "rew": 748.0899963378906, "rew_std": 115.51613589716597, "Agent": "fqf"}, {"env_step": 1600000, "rew": 675.2199920654297, "rew_std": 152.8333391660496, "Agent": "fqf"}, {"env_step": 1700000, "rew": 711.7800109863281, "rew_std": 140.070698003054, "Agent": "fqf"}, {"env_step": 1800000, "rew": 768.7699981689453, "rew_std": 205.88203217918607, "Agent": "fqf"}, {"env_step": 1900000, "rew": 830.1599975585938, "rew_std": 164.96536198083228, "Agent": "fqf"}, {"env_step": 2000000, "rew": 838.0, "rew_std": 134.02208935127933, "Agent": "fqf"}, {"env_step": 2100000, "rew": 860.7899963378907, "rew_std": 130.38507918675546, "Agent": "fqf"}, {"env_step": 2200000, "rew": 977.0700073242188, "rew_std": 126.20982209871863, "Agent": "fqf"}, {"env_step": 2300000, "rew": 950.0299987792969, "rew_std": 156.14618566531715, "Agent": "fqf"}, {"env_step": 2400000, "rew": 846.3499877929687, "rew_std": 248.7892251417476, "Agent": "fqf"}, {"env_step": 2500000, "rew": 992.1199951171875, "rew_std": 115.97767285528869, "Agent": "fqf"}, {"env_step": 2600000, "rew": 987.1100036621094, "rew_std": 102.14894278448398, "Agent": "fqf"}, {"env_step": 2700000, "rew": 1004.62001953125, "rew_std": 195.0578605917069, "Agent": "fqf"}, {"env_step": 2800000, "rew": 972.389990234375, "rew_std": 155.29393383794988, "Agent": "fqf"}, {"env_step": 2900000, "rew": 948.7399932861329, "rew_std": 274.68796913449233, "Agent": "fqf"}, {"env_step": 3000000, "rew": 1058.3299926757813, "rew_std": 170.51883401478082, "Agent": "fqf"}, {"env_step": 3100000, "rew": 1085.8400024414063, "rew_std": 143.12279720739653, "Agent": "fqf"}, {"env_step": 3200000, "rew": 1149.7299865722657, "rew_std": 218.88763506679157, "Agent": "fqf"}, {"env_step": 3300000, "rew": 1151.3599914550782, "rew_std": 190.10523364130052, "Agent": "fqf"}, {"env_step": 3400000, "rew": 1080.9000122070313, "rew_std": 332.21089689623733, "Agent": "fqf"}, {"env_step": 3500000, "rew": 1113.7200073242188, "rew_std": 122.5017752266977, "Agent": "fqf"}, {"env_step": 3600000, "rew": 1153.9199951171875, "rew_std": 110.683608376239, "Agent": "fqf"}, {"env_step": 3700000, "rew": 1151.8500061035156, "rew_std": 253.38377103265688, "Agent": "fqf"}, {"env_step": 3800000, "rew": 1277.9900146484374, "rew_std": 112.66564272737654, "Agent": "fqf"}, {"env_step": 3900000, "rew": 1282.3099731445313, "rew_std": 217.32348002992126, "Agent": "fqf"}, {"env_step": 4000000, "rew": 1248.0900024414063, "rew_std": 170.67968176451825, "Agent": "fqf"}, {"env_step": 4100000, "rew": 1322.8300170898438, "rew_std": 153.41429245269322, "Agent": "fqf"}, {"env_step": 4200000, "rew": 1236.4299896240234, "rew_std": 308.0954582465107, "Agent": "fqf"}, {"env_step": 4300000, "rew": 1342.439990234375, "rew_std": 193.51426337682267, "Agent": "fqf"}, {"env_step": 4400000, "rew": 1149.9899963378907, "rew_std": 244.50325562855912, "Agent": "fqf"}, {"env_step": 4500000, "rew": 1353.7800048828126, "rew_std": 228.50535375228858, "Agent": "fqf"}, {"env_step": 4600000, "rew": 1196.4300048828125, "rew_std": 221.77920221529982, "Agent": "fqf"}, {"env_step": 4700000, "rew": 1338.169989013672, "rew_std": 258.7844697757098, "Agent": "fqf"}, {"env_step": 4800000, "rew": 1455.7900146484376, "rew_std": 222.74215426809056, "Agent": "fqf"}, {"env_step": 4900000, "rew": 1465.7900024414062, "rew_std": 211.17771615384086, "Agent": "fqf"}, {"env_step": 5000000, "rew": 1360.9300048828125, "rew_std": 158.9558026017405, "Agent": "fqf"}, {"env_step": 5100000, "rew": 1326.210009765625, "rew_std": 153.008085562372, "Agent": "fqf"}, {"env_step": 5200000, "rew": 1324.9800048828124, "rew_std": 192.30021302076855, "Agent": "fqf"}, {"env_step": 5300000, "rew": 1373.3199951171875, "rew_std": 190.05878674516413, "Agent": "fqf"}, {"env_step": 5400000, "rew": 1444.8299926757813, "rew_std": 176.63967297772865, "Agent": "fqf"}, {"env_step": 5500000, "rew": 1380.8700012207032, "rew_std": 275.76877411799353, "Agent": "fqf"}, {"env_step": 5600000, "rew": 1449.3299926757813, "rew_std": 189.39536052457436, "Agent": "fqf"}, {"env_step": 5700000, "rew": 1399.7899780273438, "rew_std": 186.23228867562835, "Agent": "fqf"}, {"env_step": 5800000, "rew": 1526.0900024414063, "rew_std": 226.097034699169, "Agent": "fqf"}, {"env_step": 5900000, "rew": 1323.17001953125, "rew_std": 214.75273220122753, "Agent": "fqf"}, {"env_step": 6000000, "rew": 1335.5700073242188, "rew_std": 199.16455799287937, "Agent": "fqf"}, {"env_step": 6100000, "rew": 1455.260009765625, "rew_std": 188.3336061850183, "Agent": "fqf"}, {"env_step": 6200000, "rew": 1460.6299926757813, "rew_std": 168.61568012736888, "Agent": "fqf"}, {"env_step": 6300000, "rew": 1531.5299926757812, "rew_std": 223.6121634917823, "Agent": "fqf"}, {"env_step": 6400000, "rew": 1473.3, "rew_std": 157.68532141529403, "Agent": "fqf"}, {"env_step": 6500000, "rew": 1348.3100036621095, "rew_std": 336.5419653083397, "Agent": "fqf"}, {"env_step": 6600000, "rew": 1360.3900024414063, "rew_std": 385.11999323091857, "Agent": "fqf"}, {"env_step": 6700000, "rew": 1525.3900024414063, "rew_std": 223.73438160213453, "Agent": "fqf"}, {"env_step": 6800000, "rew": 1424.3700134277344, "rew_std": 227.2740165319224, "Agent": "fqf"}, {"env_step": 6900000, "rew": 1444.5199951171876, "rew_std": 206.8632128076105, "Agent": "fqf"}, {"env_step": 7000000, "rew": 1550.7000244140625, "rew_std": 243.50839787358856, "Agent": "fqf"}, {"env_step": 7100000, "rew": 1510.7899963378907, "rew_std": 258.7483723817995, "Agent": "fqf"}, {"env_step": 7200000, "rew": 1483.280010986328, "rew_std": 264.7287475532349, "Agent": "fqf"}, {"env_step": 7300000, "rew": 1499.6600219726563, "rew_std": 341.719842324931, "Agent": "fqf"}, {"env_step": 7400000, "rew": 1687.2500244140624, "rew_std": 256.1505463645012, "Agent": "fqf"}, {"env_step": 7500000, "rew": 1454.0300048828126, "rew_std": 280.16069877794627, "Agent": "fqf"}, {"env_step": 7600000, "rew": 1593.1700012207032, "rew_std": 356.6653348908056, "Agent": "fqf"}, {"env_step": 7700000, "rew": 1752.35, "rew_std": 272.67251651423953, "Agent": "fqf"}, {"env_step": 7800000, "rew": 1424.1700073242187, "rew_std": 240.21965983357364, "Agent": "fqf"}, {"env_step": 7900000, "rew": 1545.0499877929688, "rew_std": 274.2309684007817, "Agent": "fqf"}, {"env_step": 8000000, "rew": 1491.2900024414062, "rew_std": 221.5089150627939, "Agent": "fqf"}, {"env_step": 8100000, "rew": 1686.5300170898438, "rew_std": 233.44947282580907, "Agent": "fqf"}, {"env_step": 8200000, "rew": 1654.559991455078, "rew_std": 257.67832128859897, "Agent": "fqf"}, {"env_step": 8300000, "rew": 1608.6599975585937, "rew_std": 238.0655354689683, "Agent": "fqf"}, {"env_step": 8400000, "rew": 1575.3399780273437, "rew_std": 203.41885145877984, "Agent": "fqf"}, {"env_step": 8500000, "rew": 1501.899984741211, "rew_std": 443.3008682851743, "Agent": "fqf"}, {"env_step": 8600000, "rew": 1344.8300048828125, "rew_std": 226.3636211692771, "Agent": "fqf"}, {"env_step": 8700000, "rew": 1358.25, "rew_std": 207.60759860147468, "Agent": "fqf"}, {"env_step": 8800000, "rew": 1577.8999877929687, "rew_std": 218.78633687041156, "Agent": "fqf"}, {"env_step": 8900000, "rew": 1816.8199951171875, "rew_std": 314.3398063009122, "Agent": "fqf"}, {"env_step": 9000000, "rew": 1508.2200073242188, "rew_std": 202.8977505176873, "Agent": "fqf"}, {"env_step": 9100000, "rew": 1388.3, "rew_std": 358.01253786434296, "Agent": "fqf"}, {"env_step": 9200000, "rew": 1657.6999877929688, "rew_std": 162.50743736286006, "Agent": "fqf"}, {"env_step": 9300000, "rew": 1769.5699829101563, "rew_std": 430.9407204209698, "Agent": "fqf"}, {"env_step": 9400000, "rew": 1644.0599975585938, "rew_std": 363.31351881469755, "Agent": "fqf"}, {"env_step": 9500000, "rew": 1774.7999755859375, "rew_std": 458.10963716513834, "Agent": "fqf"}, {"env_step": 9600000, "rew": 1574.6399780273437, "rew_std": 286.1478292951951, "Agent": "fqf"}, {"env_step": 9700000, "rew": 1621.3900024414063, "rew_std": 203.88568091812692, "Agent": "fqf"}, {"env_step": 9800000, "rew": 1800.6699829101562, "rew_std": 246.23150505646822, "Agent": "fqf"}, {"env_step": 9900000, "rew": 1717.560009765625, "rew_std": 272.92596987574973, "Agent": "fqf"}, {"env_step": 10000000, "rew": 1663.030029296875, "rew_std": 215.58594858353038, "Agent": "fqf"}, {"env_step": 0, "rew": 0.2, "rew_std": 0.43817804165122526, "Agent": "qrdqn"}, {"env_step": 100000, "rew": 2.950000001490116, "rew_std": 8.683806768461812, "Agent": "qrdqn"}, {"env_step": 200000, "rew": 2.750000011920929, "rew_std": 7.12281545355883, "Agent": "qrdqn"}, {"env_step": 300000, "rew": 2.139999923855066, "rew_std": 6.060560801476709, "Agent": "qrdqn"}, {"env_step": 400000, "rew": 8.719999969005585, "rew_std": 12.750513633943633, "Agent": "qrdqn"}, {"env_step": 500000, "rew": 0.020000000298023225, "rew_std": 0.06000000089406966, "Agent": "qrdqn"}, {"env_step": 600000, "rew": 4.539999961853027, "rew_std": 8.518591324669725, "Agent": "qrdqn"}, {"env_step": 700000, "rew": 20.8, "rew_std": 19.17268910037082, "Agent": "qrdqn"}, {"env_step": 800000, "rew": 64.31000022888183, "rew_std": 55.11154952769602, "Agent": "qrdqn"}, {"env_step": 900000, "rew": 117.36000137329101, "rew_std": 81.42853758823472, "Agent": "qrdqn"}, {"env_step": 1000000, "rew": 212.5300022125244, "rew_std": 134.6447147733004, "Agent": "qrdqn"}, {"env_step": 1100000, "rew": 311.71999740600586, "rew_std": 143.0127312672216, "Agent": "qrdqn"}, {"env_step": 1200000, "rew": 427.21999702453616, "rew_std": 166.6607249218663, "Agent": "qrdqn"}, {"env_step": 1300000, "rew": 450.81999626159666, "rew_std": 170.6163965376507, "Agent": "qrdqn"}, {"env_step": 1400000, "rew": 446.6700017929077, "rew_std": 171.28357388934182, "Agent": "qrdqn"}, {"env_step": 1500000, "rew": 501.95000114440916, "rew_std": 214.17296736360868, "Agent": "qrdqn"}, {"env_step": 1600000, "rew": 515.3500011444091, "rew_std": 228.24397876121898, "Agent": "qrdqn"}, {"env_step": 1700000, "rew": 532.7900005340576, "rew_std": 260.70330697040697, "Agent": "qrdqn"}, {"env_step": 1800000, "rew": 535.5499984741211, "rew_std": 226.46023524425055, "Agent": "qrdqn"}, {"env_step": 1900000, "rew": 614.3999969482422, "rew_std": 229.86442537632706, "Agent": "qrdqn"}, {"env_step": 2000000, "rew": 495.1, "rew_std": 277.79406604885946, "Agent": "qrdqn"}, {"env_step": 2100000, "rew": 585.6600044250488, "rew_std": 230.43919543285637, "Agent": "qrdqn"}, {"env_step": 2200000, "rew": 716.0299999237061, "rew_std": 264.0099569711277, "Agent": "qrdqn"}, {"env_step": 2300000, "rew": 542.9899975776673, "rew_std": 303.6674555033, "Agent": "qrdqn"}, {"env_step": 2400000, "rew": 715.6400062561036, "rew_std": 286.50699130558786, "Agent": "qrdqn"}, {"env_step": 2500000, "rew": 648.0000051498413, "rew_std": 301.31887025376795, "Agent": "qrdqn"}, {"env_step": 2600000, "rew": 593.8800014495849, "rew_std": 302.4725858413253, "Agent": "qrdqn"}, {"env_step": 2700000, "rew": 662.1199962615967, "rew_std": 292.98960962786776, "Agent": "qrdqn"}, {"env_step": 2800000, "rew": 695.9800054550171, "rew_std": 284.20259753123815, "Agent": "qrdqn"}, {"env_step": 2900000, "rew": 729.7400060653687, "rew_std": 261.0564239167806, "Agent": "qrdqn"}, {"env_step": 3000000, "rew": 765.3200073242188, "rew_std": 267.8584904904683, "Agent": "qrdqn"}, {"env_step": 3100000, "rew": 763.0400096893311, "rew_std": 257.2424218386392, "Agent": "qrdqn"}, {"env_step": 3200000, "rew": 775.7299938201904, "rew_std": 273.62065502831194, "Agent": "qrdqn"}, {"env_step": 3300000, "rew": 768.5900030136108, "rew_std": 293.28401432770056, "Agent": "qrdqn"}, {"env_step": 3400000, "rew": 736.4700023651124, "rew_std": 251.00099161324306, "Agent": "qrdqn"}, {"env_step": 3500000, "rew": 704.0100011825562, "rew_std": 279.16315078624314, "Agent": "qrdqn"}, {"env_step": 3600000, "rew": 711.050004196167, "rew_std": 264.62492121177553, "Agent": "qrdqn"}, {"env_step": 3700000, "rew": 801.9700037002564, "rew_std": 277.7941106570264, "Agent": "qrdqn"}, {"env_step": 3800000, "rew": 844.069990158081, "rew_std": 287.56283848408725, "Agent": "qrdqn"}, {"env_step": 3900000, "rew": 759.6999963760376, "rew_std": 279.6283181481931, "Agent": "qrdqn"}, {"env_step": 4000000, "rew": 583.4499931335449, "rew_std": 287.06731842027597, "Agent": "qrdqn"}, {"env_step": 4100000, "rew": 749.8800132751464, "rew_std": 264.1112287832134, "Agent": "qrdqn"}, {"env_step": 4200000, "rew": 793.8099964141845, "rew_std": 302.25798478353556, "Agent": "qrdqn"}, {"env_step": 4300000, "rew": 721.4700023651124, "rew_std": 280.748223525854, "Agent": "qrdqn"}, {"env_step": 4400000, "rew": 782.9300025939941, "rew_std": 341.9947531259377, "Agent": "qrdqn"}, {"env_step": 4500000, "rew": 861.8200061798095, "rew_std": 300.90433581636256, "Agent": "qrdqn"}, {"env_step": 4600000, "rew": 699.4100095748902, "rew_std": 358.53734373918525, "Agent": "qrdqn"}, {"env_step": 4700000, "rew": 763.3199853897095, "rew_std": 321.5669115617899, "Agent": "qrdqn"}, {"env_step": 4800000, "rew": 875.8500049591064, "rew_std": 322.19139996909126, "Agent": "qrdqn"}, {"env_step": 4900000, "rew": 798.370009803772, "rew_std": 320.01471740842925, "Agent": "qrdqn"}, {"env_step": 5000000, "rew": 916.0799865722656, "rew_std": 321.69201590196326, "Agent": "qrdqn"}, {"env_step": 5100000, "rew": 854.7900035858154, "rew_std": 282.8008511766018, "Agent": "qrdqn"}, {"env_step": 5200000, "rew": 778.5300037384034, "rew_std": 300.45106950066236, "Agent": "qrdqn"}, {"env_step": 5300000, "rew": 824.4299976348877, "rew_std": 302.3329753994756, "Agent": "qrdqn"}, {"env_step": 5400000, "rew": 888.6600048065186, "rew_std": 338.1730635095935, "Agent": "qrdqn"}, {"env_step": 5500000, "rew": 839.840009689331, "rew_std": 347.04786183662765, "Agent": "qrdqn"}, {"env_step": 5600000, "rew": 743.8000047683715, "rew_std": 372.26147487134074, "Agent": "qrdqn"}, {"env_step": 5700000, "rew": 867.8499877929687, "rew_std": 316.0468936380672, "Agent": "qrdqn"}, {"env_step": 5800000, "rew": 823.3300037384033, "rew_std": 323.5871449731984, "Agent": "qrdqn"}, {"env_step": 5900000, "rew": 840.5399921417236, "rew_std": 379.45472835781385, "Agent": "qrdqn"}, {"env_step": 6000000, "rew": 795.1999963760376, "rew_std": 305.4413360252164, "Agent": "qrdqn"}, {"env_step": 6100000, "rew": 837.2100086212158, "rew_std": 294.2474416713455, "Agent": "qrdqn"}, {"env_step": 6200000, "rew": 832.8199975967407, "rew_std": 315.05369700324695, "Agent": "qrdqn"}, {"env_step": 6300000, "rew": 758.1000026702881, "rew_std": 356.9235426531571, "Agent": "qrdqn"}, {"env_step": 6400000, "rew": 869.2500061035156, "rew_std": 302.04547033603006, "Agent": "qrdqn"}, {"env_step": 6500000, "rew": 785.2299983978271, "rew_std": 393.64854338638054, "Agent": "qrdqn"}, {"env_step": 6600000, "rew": 790.1000085830689, "rew_std": 365.117793324928, "Agent": "qrdqn"}, {"env_step": 6700000, "rew": 871.6399927139282, "rew_std": 307.94262149553043, "Agent": "qrdqn"}, {"env_step": 6800000, "rew": 769.5600109100342, "rew_std": 397.7108398542242, "Agent": "qrdqn"}, {"env_step": 6900000, "rew": 897.1599975585938, "rew_std": 326.12778476504053, "Agent": "qrdqn"}, {"env_step": 7000000, "rew": 826.01999874115, "rew_std": 303.25010067918225, "Agent": "qrdqn"}, {"env_step": 7100000, "rew": 899.7099914550781, "rew_std": 354.635686696347, "Agent": "qrdqn"}, {"env_step": 7200000, "rew": 839.5300022125244, "rew_std": 366.1003253391808, "Agent": "qrdqn"}, {"env_step": 7300000, "rew": 789.2700035095215, "rew_std": 325.517405634325, "Agent": "qrdqn"}, {"env_step": 7400000, "rew": 791.4800006866456, "rew_std": 308.0200619677834, "Agent": "qrdqn"}, {"env_step": 7500000, "rew": 753.9000038146972, "rew_std": 353.6681404822802, "Agent": "qrdqn"}, {"env_step": 7600000, "rew": 760.5899974822999, "rew_std": 373.76654972757194, "Agent": "qrdqn"}, {"env_step": 7700000, "rew": 895.6800060272217, "rew_std": 332.0611307126876, "Agent": "qrdqn"}, {"env_step": 7800000, "rew": 797.7900001525879, "rew_std": 337.71737028991475, "Agent": "qrdqn"}, {"env_step": 7900000, "rew": 863.1199945449829, "rew_std": 383.8510796441181, "Agent": "qrdqn"}, {"env_step": 8000000, "rew": 936.8600036621094, "rew_std": 335.0957512819982, "Agent": "qrdqn"}, {"env_step": 8100000, "rew": 873.0900012969971, "rew_std": 323.48754155145383, "Agent": "qrdqn"}, {"env_step": 8200000, "rew": 897.2599962234497, "rew_std": 430.1072599184845, "Agent": "qrdqn"}, {"env_step": 8300000, "rew": 925.4600048065186, "rew_std": 328.0253425434283, "Agent": "qrdqn"}, {"env_step": 8400000, "rew": 800.1099956512451, "rew_std": 369.4526365613206, "Agent": "qrdqn"}, {"env_step": 8500000, "rew": 764.4199901580811, "rew_std": 386.03387542422104, "Agent": "qrdqn"}, {"env_step": 8600000, "rew": 951.6600109100342, "rew_std": 333.5321850552157, "Agent": "qrdqn"}, {"env_step": 8700000, "rew": 746.5100072860718, "rew_std": 286.61728504504305, "Agent": "qrdqn"}, {"env_step": 8800000, "rew": 849.2099956512451, "rew_std": 376.9793778487278, "Agent": "qrdqn"}, {"env_step": 8900000, "rew": 804.9700115203857, "rew_std": 377.45671631618694, "Agent": "qrdqn"}, {"env_step": 9000000, "rew": 855.3400024414062, "rew_std": 335.099863589258, "Agent": "qrdqn"}, {"env_step": 9100000, "rew": 660.5900043487549, "rew_std": 356.5631885015981, "Agent": "qrdqn"}, {"env_step": 9200000, "rew": 878.2999950408936, "rew_std": 312.5758680927236, "Agent": "qrdqn"}, {"env_step": 9300000, "rew": 826.950011062622, "rew_std": 350.83283915590056, "Agent": "qrdqn"}, {"env_step": 9400000, "rew": 790.7599872589111, "rew_std": 401.66813659775227, "Agent": "qrdqn"}, {"env_step": 9500000, "rew": 849.3099939346314, "rew_std": 313.7882372406271, "Agent": "qrdqn"}, {"env_step": 9600000, "rew": 854.3600103378296, "rew_std": 367.7412676045596, "Agent": "qrdqn"}, {"env_step": 9700000, "rew": 803.1500019073486, "rew_std": 384.89482159028836, "Agent": "qrdqn"}, {"env_step": 9800000, "rew": 655.2900049209595, "rew_std": 378.5388990350636, "Agent": "qrdqn"}, {"env_step": 9900000, "rew": 778.3899938583374, "rew_std": 332.5220751161268, "Agent": "qrdqn"}, {"env_step": 10000000, "rew": 805.2999959945679, "rew_std": 376.93931456836924, "Agent": "qrdqn"}, {"env_step": 0, "rew": 0.020000000298023225, "rew_std": 0.06000000089406966, "Agent": "iqn"}, {"env_step": 100000, "rew": 1.2300000190734863, "rew_std": 3.52648555976251, "Agent": "iqn"}, {"env_step": 200000, "rew": 0.17000000029802323, "rew_std": 0.2193171198611889, "Agent": "iqn"}, {"env_step": 300000, "rew": 2.0799999237060547, "rew_std": 5.910634254899296, "Agent": "iqn"}, {"env_step": 400000, "rew": 0.04000000059604645, "rew_std": 0.08000000119209288, "Agent": "iqn"}, {"env_step": 500000, "rew": 3.5900000773370264, "rew_std": 10.53740503238059, "Agent": "iqn"}, {"env_step": 600000, "rew": 4.530000066757202, "rew_std": 9.554166769060705, "Agent": "iqn"}, {"env_step": 700000, "rew": 3.480000114440918, "rew_std": 6.673799742209458, "Agent": "iqn"}, {"env_step": 800000, "rew": 37.689999313652514, "rew_std": 42.65694315548521, "Agent": "iqn"}, {"env_step": 900000, "rew": 123.90999913215637, "rew_std": 116.8702046746665, "Agent": "iqn"}, {"env_step": 1000000, "rew": 214.27999999523163, "rew_std": 131.19766379013453, "Agent": "iqn"}, {"env_step": 1100000, "rew": 314.01000213623047, "rew_std": 130.61631856241644, "Agent": "iqn"}, {"env_step": 1200000, "rew": 447.6299987792969, "rew_std": 132.88649299306232, "Agent": "iqn"}, {"env_step": 1300000, "rew": 488.24000244140626, "rew_std": 144.3410893300805, "Agent": "iqn"}, {"env_step": 1400000, "rew": 562.2800018310547, "rew_std": 126.35490427347918, "Agent": "iqn"}, {"env_step": 1500000, "rew": 503.0600067138672, "rew_std": 126.1369021393955, "Agent": "iqn"}, {"env_step": 1600000, "rew": 590.9699981689453, "rew_std": 106.22815802626981, "Agent": "iqn"}, {"env_step": 1700000, "rew": 656.8100067138672, "rew_std": 176.85540361563625, "Agent": "iqn"}, {"env_step": 1800000, "rew": 639.1500061035156, "rew_std": 159.90553612644817, "Agent": "iqn"}, {"env_step": 1900000, "rew": 654.3699951171875, "rew_std": 166.7387170420838, "Agent": "iqn"}, {"env_step": 2000000, "rew": 633.2700103759765, "rew_std": 194.57762929404822, "Agent": "iqn"}, {"env_step": 2100000, "rew": 700.1900115966797, "rew_std": 157.40320450762798, "Agent": "iqn"}, {"env_step": 2200000, "rew": 628.6800003051758, "rew_std": 207.51145605282667, "Agent": "iqn"}, {"env_step": 2300000, "rew": 684.490007019043, "rew_std": 224.21249061996116, "Agent": "iqn"}, {"env_step": 2400000, "rew": 756.5900001525879, "rew_std": 288.4319216808326, "Agent": "iqn"}, {"env_step": 2500000, "rew": 675.5500030517578, "rew_std": 244.21607763740568, "Agent": "iqn"}, {"env_step": 2600000, "rew": 779.7999938964844, "rew_std": 256.5563681757168, "Agent": "iqn"}, {"env_step": 2700000, "rew": 727.7399963378906, "rew_std": 269.84431788518737, "Agent": "iqn"}, {"env_step": 2800000, "rew": 792.0200012207031, "rew_std": 116.56821154467826, "Agent": "iqn"}, {"env_step": 2900000, "rew": 859.7300109863281, "rew_std": 185.18119041013455, "Agent": "iqn"}, {"env_step": 3000000, "rew": 899.7199981689453, "rew_std": 216.57974461966018, "Agent": "iqn"}, {"env_step": 3100000, "rew": 915.3699890136719, "rew_std": 114.51627047111124, "Agent": "iqn"}, {"env_step": 3200000, "rew": 795.7599945068359, "rew_std": 240.49648753795014, "Agent": "iqn"}, {"env_step": 3300000, "rew": 880.1699981689453, "rew_std": 189.51477550814192, "Agent": "iqn"}, {"env_step": 3400000, "rew": 945.1100036621094, "rew_std": 128.7564537028809, "Agent": "iqn"}, {"env_step": 3500000, "rew": 919.9100036621094, "rew_std": 244.82328680147785, "Agent": "iqn"}, {"env_step": 3600000, "rew": 982.8000061035157, "rew_std": 159.50998642265364, "Agent": "iqn"}, {"env_step": 3700000, "rew": 837.7000122070312, "rew_std": 188.50218141170882, "Agent": "iqn"}, {"env_step": 3800000, "rew": 1006.9300048828125, "rew_std": 142.2613367175323, "Agent": "iqn"}, {"env_step": 3900000, "rew": 913.0099868774414, "rew_std": 267.489982408041, "Agent": "iqn"}, {"env_step": 4000000, "rew": 874.3000061035157, "rew_std": 173.92007373390783, "Agent": "iqn"}, {"env_step": 4100000, "rew": 910.5500030517578, "rew_std": 194.92890230647552, "Agent": "iqn"}, {"env_step": 4200000, "rew": 983.5, "rew_std": 116.83092046232777, "Agent": "iqn"}, {"env_step": 4300000, "rew": 901.1400039672851, "rew_std": 305.8871257170003, "Agent": "iqn"}, {"env_step": 4400000, "rew": 813.9199890136719, "rew_std": 259.7093781051844, "Agent": "iqn"}, {"env_step": 4500000, "rew": 975.1299987792969, "rew_std": 249.7706832098956, "Agent": "iqn"}, {"env_step": 4600000, "rew": 964.7699890136719, "rew_std": 288.6829312458577, "Agent": "iqn"}, {"env_step": 4700000, "rew": 990.8800170898437, "rew_std": 227.1040665924821, "Agent": "iqn"}, {"env_step": 4800000, "rew": 1069.3499877929687, "rew_std": 184.13221489797237, "Agent": "iqn"}, {"env_step": 4900000, "rew": 985.4000122070313, "rew_std": 185.19558967958181, "Agent": "iqn"}, {"env_step": 5000000, "rew": 888.0499984741211, "rew_std": 383.0892119253023, "Agent": "iqn"}, {"env_step": 5100000, "rew": 1122.0600036621095, "rew_std": 252.77394487644332, "Agent": "iqn"}, {"env_step": 5200000, "rew": 972.6900054931641, "rew_std": 222.1775487183736, "Agent": "iqn"}, {"env_step": 5300000, "rew": 966.9400115966797, "rew_std": 369.08832261651287, "Agent": "iqn"}, {"env_step": 5400000, "rew": 789.2899993896484, "rew_std": 320.53568647830184, "Agent": "iqn"}, {"env_step": 5500000, "rew": 1027.3899841308594, "rew_std": 133.49564343614747, "Agent": "iqn"}, {"env_step": 5600000, "rew": 872.7399963378906, "rew_std": 283.1106543105209, "Agent": "iqn"}, {"env_step": 5700000, "rew": 1003.5799987792968, "rew_std": 303.12510600006334, "Agent": "iqn"}, {"env_step": 5800000, "rew": 898.3699935913086, "rew_std": 299.9163407129428, "Agent": "iqn"}, {"env_step": 5900000, "rew": 928.5400024414063, "rew_std": 183.30899650150636, "Agent": "iqn"}, {"env_step": 6000000, "rew": 1099.45, "rew_std": 215.64728660357196, "Agent": "iqn"}, {"env_step": 6100000, "rew": 1008.9999969482421, "rew_std": 270.1578310856458, "Agent": "iqn"}, {"env_step": 6200000, "rew": 1065.940008544922, "rew_std": 255.40183052553036, "Agent": "iqn"}, {"env_step": 6300000, "rew": 811.1000024795533, "rew_std": 373.20585603601734, "Agent": "iqn"}, {"env_step": 6400000, "rew": 940.3700012207031, "rew_std": 246.35399546539406, "Agent": "iqn"}, {"env_step": 6500000, "rew": 1068.6700012207032, "rew_std": 97.1969648010114, "Agent": "iqn"}, {"env_step": 6600000, "rew": 1245.320001220703, "rew_std": 287.68207875342046, "Agent": "iqn"}, {"env_step": 6700000, "rew": 1029.4099975585937, "rew_std": 181.585287367347, "Agent": "iqn"}, {"env_step": 6800000, "rew": 1042.259991455078, "rew_std": 164.0986750263718, "Agent": "iqn"}, {"env_step": 6900000, "rew": 838.8700035095214, "rew_std": 355.00632184818426, "Agent": "iqn"}, {"env_step": 7000000, "rew": 1098.1199951171875, "rew_std": 197.77309679595174, "Agent": "iqn"}, {"env_step": 7100000, "rew": 929.949984741211, "rew_std": 290.7787575853067, "Agent": "iqn"}, {"env_step": 7200000, "rew": 1002.5799926757812, "rew_std": 238.4512457320423, "Agent": "iqn"}, {"env_step": 7300000, "rew": 936.2500061035156, "rew_std": 200.61891005074025, "Agent": "iqn"}, {"env_step": 7400000, "rew": 1090.2499938964843, "rew_std": 137.09873398122215, "Agent": "iqn"}, {"env_step": 7500000, "rew": 1079.7300170898438, "rew_std": 129.4222508666326, "Agent": "iqn"}, {"env_step": 7600000, "rew": 968.8100051879883, "rew_std": 469.84580201774713, "Agent": "iqn"}, {"env_step": 7700000, "rew": 1022.8900024414063, "rew_std": 251.64726234338931, "Agent": "iqn"}, {"env_step": 7800000, "rew": 1021.4299987792969, "rew_std": 243.66798220474894, "Agent": "iqn"}, {"env_step": 7900000, "rew": 1113.2900024414062, "rew_std": 199.72603151675196, "Agent": "iqn"}, {"env_step": 8000000, "rew": 1132.0199890136719, "rew_std": 263.47352587686873, "Agent": "iqn"}, {"env_step": 8100000, "rew": 1050.8499877929687, "rew_std": 191.7771377236277, "Agent": "iqn"}, {"env_step": 8200000, "rew": 1099.139990234375, "rew_std": 223.08476706246242, "Agent": "iqn"}, {"env_step": 8300000, "rew": 1095.5199951171876, "rew_std": 152.96869354522246, "Agent": "iqn"}, {"env_step": 8400000, "rew": 1059.9700012207031, "rew_std": 121.60402177574996, "Agent": "iqn"}, {"env_step": 8500000, "rew": 1119.9500122070312, "rew_std": 173.45015933529174, "Agent": "iqn"}, {"env_step": 8600000, "rew": 940.7099975585937, "rew_std": 184.9244564548404, "Agent": "iqn"}, {"env_step": 8700000, "rew": 930.7999961853027, "rew_std": 366.8234875758687, "Agent": "iqn"}, {"env_step": 8800000, "rew": 1097.2800170898438, "rew_std": 296.206762846177, "Agent": "iqn"}, {"env_step": 8900000, "rew": 1139.8899780273437, "rew_std": 255.91239554199356, "Agent": "iqn"}, {"env_step": 9000000, "rew": 1043.5500061035157, "rew_std": 173.89118497858877, "Agent": "iqn"}, {"env_step": 9100000, "rew": 929.5200164794921, "rew_std": 373.30865054928415, "Agent": "iqn"}, {"env_step": 9200000, "rew": 1205.3200134277345, "rew_std": 275.42816935716706, "Agent": "iqn"}, {"env_step": 9300000, "rew": 1150.1200012207032, "rew_std": 260.06818848705825, "Agent": "iqn"}, {"env_step": 9400000, "rew": 1100.200018310547, "rew_std": 185.52076935234098, "Agent": "iqn"}, {"env_step": 9500000, "rew": 1058.1600158691406, "rew_std": 311.87799312292907, "Agent": "iqn"}, {"env_step": 9600000, "rew": 1252.6800048828125, "rew_std": 118.09878836211058, "Agent": "iqn"}, {"env_step": 9700000, "rew": 1132.0099853515626, "rew_std": 200.64719895414822, "Agent": "iqn"}, {"env_step": 9800000, "rew": 1039.539990234375, "rew_std": 270.93414588943654, "Agent": "iqn"}, {"env_step": 9900000, "rew": 1111.9599914550781, "rew_std": 303.33757722581527, "Agent": "iqn"}, {"env_step": 10000000, "rew": 1095.0599853515625, "rew_std": 200.86304116683058, "Agent": "iqn"}, {"env_step": 0, "rew": 0.2100000001490116, "rew_std": 0.5974110812223167, "Agent": "rainbow"}, {"env_step": 100000, "rew": 0.12999999523162842, "rew_std": 0.38999998569488525, "Agent": "rainbow"}, {"env_step": 200000, "rew": 2.7599999859929083, "rew_std": 6.376864428026797, "Agent": "rainbow"}, {"env_step": 300000, "rew": 0.7399999916553497, "rew_std": 1.967841428665537, "Agent": "rainbow"}, {"env_step": 400000, "rew": 0.7299999989569187, "rew_std": 1.9344508264320834, "Agent": "rainbow"}, {"env_step": 500000, "rew": 2.250000037252903, "rew_std": 6.453410065907903, "Agent": "rainbow"}, {"env_step": 600000, "rew": 0.3300000071525574, "rew_std": 0.7043436813838512, "Agent": "rainbow"}, {"env_step": 700000, "rew": 11.450000222027302, "rew_std": 19.350775633693363, "Agent": "rainbow"}, {"env_step": 800000, "rew": 51.87000031471253, "rew_std": 41.72313644425024, "Agent": "rainbow"}, {"env_step": 900000, "rew": 134.95999908447266, "rew_std": 26.553200308986938, "Agent": "rainbow"}, {"env_step": 1000000, "rew": 273.26000061035154, "rew_std": 40.46751980619983, "Agent": "rainbow"}, {"env_step": 1100000, "rew": 394.9699981689453, "rew_std": 35.29900920102157, "Agent": "rainbow"}, {"env_step": 1200000, "rew": 475.4699951171875, "rew_std": 51.0648386902766, "Agent": "rainbow"}, {"env_step": 1300000, "rew": 537.4499969482422, "rew_std": 87.05237149623139, "Agent": "rainbow"}, {"env_step": 1400000, "rew": 528.3800109863281, "rew_std": 74.70653568610189, "Agent": "rainbow"}, {"env_step": 1500000, "rew": 602.5700042724609, "rew_std": 63.9815013284615, "Agent": "rainbow"}, {"env_step": 1600000, "rew": 672.6400024414063, "rew_std": 75.62920855186832, "Agent": "rainbow"}, {"env_step": 1700000, "rew": 670.960009765625, "rew_std": 59.331076612532364, "Agent": "rainbow"}, {"env_step": 1800000, "rew": 704.7300048828125, "rew_std": 67.48957648360094, "Agent": "rainbow"}, {"env_step": 1900000, "rew": 787.0799987792968, "rew_std": 112.42707564022125, "Agent": "rainbow"}, {"env_step": 2000000, "rew": 823.6899963378906, "rew_std": 77.87041479137376, "Agent": "rainbow"}, {"env_step": 2100000, "rew": 840.9600036621093, "rew_std": 68.54743565383826, "Agent": "rainbow"}, {"env_step": 2200000, "rew": 822.8200012207031, "rew_std": 101.75918406873306, "Agent": "rainbow"}, {"env_step": 2300000, "rew": 846.6400024414063, "rew_std": 56.10774301137517, "Agent": "rainbow"}, {"env_step": 2400000, "rew": 935.4899963378906, "rew_std": 81.81529716155883, "Agent": "rainbow"}, {"env_step": 2500000, "rew": 871.6499938964844, "rew_std": 105.67288003470732, "Agent": "rainbow"}, {"env_step": 2600000, "rew": 935.3400085449218, "rew_std": 93.00937834754181, "Agent": "rainbow"}, {"env_step": 2700000, "rew": 962.9700134277343, "rew_std": 60.47081018959421, "Agent": "rainbow"}, {"env_step": 2800000, "rew": 939.1, "rew_std": 76.38223487303658, "Agent": "rainbow"}, {"env_step": 2900000, "rew": 983.4500122070312, "rew_std": 67.66671891975996, "Agent": "rainbow"}, {"env_step": 3000000, "rew": 1005.25, "rew_std": 46.68377918260524, "Agent": "rainbow"}, {"env_step": 3100000, "rew": 1027.85, "rew_std": 100.20803948410037, "Agent": "rainbow"}, {"env_step": 3200000, "rew": 1019.7700134277344, "rew_std": 74.38652294354544, "Agent": "rainbow"}, {"env_step": 3300000, "rew": 1025.939990234375, "rew_std": 67.48715328162088, "Agent": "rainbow"}, {"env_step": 3400000, "rew": 1048.9500061035155, "rew_std": 63.75370223721345, "Agent": "rainbow"}, {"env_step": 3500000, "rew": 1024.8799865722656, "rew_std": 81.16544192265837, "Agent": "rainbow"}, {"env_step": 3600000, "rew": 1057.9299865722655, "rew_std": 79.86718486180474, "Agent": "rainbow"}, {"env_step": 3700000, "rew": 1059.6200012207032, "rew_std": 100.3108670590954, "Agent": "rainbow"}, {"env_step": 3800000, "rew": 1100.7599975585938, "rew_std": 81.71528182411633, "Agent": "rainbow"}, {"env_step": 3900000, "rew": 1076.8199890136718, "rew_std": 65.37815728364572, "Agent": "rainbow"}, {"env_step": 4000000, "rew": 1191.0, "rew_std": 100.77746153732592, "Agent": "rainbow"}, {"env_step": 4100000, "rew": 1085.950018310547, "rew_std": 93.44744071480993, "Agent": "rainbow"}, {"env_step": 4200000, "rew": 1124.4300048828125, "rew_std": 98.69067996332086, "Agent": "rainbow"}, {"env_step": 4300000, "rew": 1204.660009765625, "rew_std": 81.11235149073514, "Agent": "rainbow"}, {"env_step": 4400000, "rew": 1136.6400024414063, "rew_std": 78.94447133335663, "Agent": "rainbow"}, {"env_step": 4500000, "rew": 1154.5200073242188, "rew_std": 74.92851522386881, "Agent": "rainbow"}, {"env_step": 4600000, "rew": 1206.0400085449219, "rew_std": 103.3929078135147, "Agent": "rainbow"}, {"env_step": 4700000, "rew": 1204.1800170898437, "rew_std": 82.69720477051608, "Agent": "rainbow"}, {"env_step": 4800000, "rew": 1142.9900146484374, "rew_std": 100.03756445774546, "Agent": "rainbow"}, {"env_step": 4900000, "rew": 1199.2599853515626, "rew_std": 43.66213881744983, "Agent": "rainbow"}, {"env_step": 5000000, "rew": 1174.4100036621094, "rew_std": 135.42985480511723, "Agent": "rainbow"}, {"env_step": 5100000, "rew": 1206.4500244140625, "rew_std": 65.40353311217433, "Agent": "rainbow"}, {"env_step": 5200000, "rew": 1213.0400024414062, "rew_std": 56.73084174140543, "Agent": "rainbow"}, {"env_step": 5300000, "rew": 1279.0799926757813, "rew_std": 122.6794261050074, "Agent": "rainbow"}, {"env_step": 5400000, "rew": 1260.5200073242188, "rew_std": 78.9505194850195, "Agent": "rainbow"}, {"env_step": 5500000, "rew": 1181.0700073242188, "rew_std": 114.83173344170228, "Agent": "rainbow"}, {"env_step": 5600000, "rew": 1176.05, "rew_std": 83.41206986065441, "Agent": "rainbow"}, {"env_step": 5700000, "rew": 1270.2599853515626, "rew_std": 124.96829424226486, "Agent": "rainbow"}, {"env_step": 5800000, "rew": 1261.5499755859375, "rew_std": 105.05167725943326, "Agent": "rainbow"}, {"env_step": 5900000, "rew": 1254.1099853515625, "rew_std": 103.52183258934855, "Agent": "rainbow"}, {"env_step": 6000000, "rew": 1285.210009765625, "rew_std": 135.08428673916382, "Agent": "rainbow"}, {"env_step": 6100000, "rew": 1321.8599975585937, "rew_std": 98.54867262115465, "Agent": "rainbow"}, {"env_step": 6200000, "rew": 1270.4499877929688, "rew_std": 134.84840828335157, "Agent": "rainbow"}, {"env_step": 6300000, "rew": 1291.8700073242187, "rew_std": 128.573159656795, "Agent": "rainbow"}, {"env_step": 6400000, "rew": 1372.1099975585937, "rew_std": 88.26145690808981, "Agent": "rainbow"}, {"env_step": 6500000, "rew": 1354.3300170898438, "rew_std": 76.23653593249794, "Agent": "rainbow"}, {"env_step": 6600000, "rew": 1337.8300048828125, "rew_std": 111.20302612183444, "Agent": "rainbow"}, {"env_step": 6700000, "rew": 1287.5800048828125, "rew_std": 156.0572077287139, "Agent": "rainbow"}, {"env_step": 6800000, "rew": 1319.3700073242187, "rew_std": 129.23841474784112, "Agent": "rainbow"}, {"env_step": 6900000, "rew": 1279.7999877929688, "rew_std": 117.75878546918071, "Agent": "rainbow"}, {"env_step": 7000000, "rew": 1328.610009765625, "rew_std": 100.9171629081728, "Agent": "rainbow"}, {"env_step": 7100000, "rew": 1364.7, "rew_std": 163.70892187079815, "Agent": "rainbow"}, {"env_step": 7200000, "rew": 1308.8900024414063, "rew_std": 88.94055366414823, "Agent": "rainbow"}, {"env_step": 7300000, "rew": 1322.25, "rew_std": 94.6752858690983, "Agent": "rainbow"}, {"env_step": 7400000, "rew": 1309.5300170898438, "rew_std": 130.62605278751548, "Agent": "rainbow"}, {"env_step": 7500000, "rew": 1346.460009765625, "rew_std": 117.62017984362635, "Agent": "rainbow"}, {"env_step": 7600000, "rew": 1307.6800170898437, "rew_std": 135.51715895844544, "Agent": "rainbow"}, {"env_step": 7700000, "rew": 1370.15, "rew_std": 121.40495533214569, "Agent": "rainbow"}, {"env_step": 7800000, "rew": 1366.02001953125, "rew_std": 155.8914434634503, "Agent": "rainbow"}, {"env_step": 7900000, "rew": 1383.0500122070312, "rew_std": 119.51592283983616, "Agent": "rainbow"}, {"env_step": 8000000, "rew": 1347.489990234375, "rew_std": 107.93389821410152, "Agent": "rainbow"}, {"env_step": 8100000, "rew": 1382.2799926757812, "rew_std": 81.35011185204777, "Agent": "rainbow"}, {"env_step": 8200000, "rew": 1357.7900024414062, "rew_std": 102.5295829744403, "Agent": "rainbow"}, {"env_step": 8300000, "rew": 1308.2999877929688, "rew_std": 108.36144846385606, "Agent": "rainbow"}, {"env_step": 8400000, "rew": 1368.2300048828124, "rew_std": 109.5325932250981, "Agent": "rainbow"}, {"env_step": 8500000, "rew": 1311.7599975585938, "rew_std": 113.36432112272027, "Agent": "rainbow"}, {"env_step": 8600000, "rew": 1384.7000122070312, "rew_std": 129.2364554734506, "Agent": "rainbow"}, {"env_step": 8700000, "rew": 1377.9300170898437, "rew_std": 130.36785628181076, "Agent": "rainbow"}, {"env_step": 8800000, "rew": 1420.160009765625, "rew_std": 126.74235879373302, "Agent": "rainbow"}, {"env_step": 8900000, "rew": 1345.9299926757812, "rew_std": 110.19251868290497, "Agent": "rainbow"}, {"env_step": 9000000, "rew": 1361.6299926757813, "rew_std": 146.04748540296958, "Agent": "rainbow"}, {"env_step": 9100000, "rew": 1334.089990234375, "rew_std": 85.54697028191892, "Agent": "rainbow"}, {"env_step": 9200000, "rew": 1292.2299926757812, "rew_std": 143.56093407787503, "Agent": "rainbow"}, {"env_step": 9300000, "rew": 1363.2300048828124, "rew_std": 162.6994478889228, "Agent": "rainbow"}, {"env_step": 9400000, "rew": 1438.1799926757812, "rew_std": 130.79961009153894, "Agent": "rainbow"}, {"env_step": 9500000, "rew": 1496.1199951171875, "rew_std": 112.32410367854669, "Agent": "rainbow"}, {"env_step": 9600000, "rew": 1472.02001953125, "rew_std": 126.8561263598282, "Agent": "rainbow"}, {"env_step": 9700000, "rew": 1391.2999877929688, "rew_std": 85.84510612212074, "Agent": "rainbow"}, {"env_step": 9800000, "rew": 1311.4199951171875, "rew_std": 129.5012392428379, "Agent": "rainbow"}, {"env_step": 9900000, "rew": 1416.0599975585938, "rew_std": 92.06283588597819, "Agent": "rainbow"}, {"env_step": 10000000, "rew": 1416.15, "rew_std": 73.5937659368437, "Agent": "rainbow"}, {"env_step": 0, "rew": 0.010000000149011612, "rew_std": 0.03000000044703483, "Agent": "ppo"}, {"env_step": 100000, "rew": 5.800000095367432, "rew_std": 9.745665904076512, "Agent": "ppo"}, {"env_step": 200000, "rew": 17.870000410079957, "rew_std": 24.34313324422257, "Agent": "ppo"}, {"env_step": 300000, "rew": 33.790000438690186, "rew_std": 34.83846926788018, "Agent": "ppo"}, {"env_step": 400000, "rew": 49.810000157356264, "rew_std": 45.02722450198439, "Agent": "ppo"}, {"env_step": 500000, "rew": 63.84000015258789, "rew_std": 55.360387449231766, "Agent": "ppo"}, {"env_step": 600000, "rew": 70.23000016212464, "rew_std": 58.73603829366222, "Agent": "ppo"}, {"env_step": 700000, "rew": 75.51999950408936, "rew_std": 67.29662320233204, "Agent": "ppo"}, {"env_step": 800000, "rew": 81.71000061035156, "rew_std": 56.81521740785272, "Agent": "ppo"}, {"env_step": 900000, "rew": 113.76000213623047, "rew_std": 79.55546964125922, "Agent": "ppo"}, {"env_step": 1000000, "rew": 116.16000061035156, "rew_std": 84.3558915187514, "Agent": "ppo"}, {"env_step": 1100000, "rew": 122.9199995458126, "rew_std": 82.33155847187241, "Agent": "ppo"}, {"env_step": 1200000, "rew": 150.4199990928173, "rew_std": 104.54179755010438, "Agent": "ppo"}, {"env_step": 1300000, "rew": 168.3199987411499, "rew_std": 108.28887045300536, "Agent": "ppo"}, {"env_step": 1400000, "rew": 176.67999801635742, "rew_std": 94.50735937346175, "Agent": "ppo"}, {"env_step": 1500000, "rew": 210.8900005340576, "rew_std": 108.90844271319088, "Agent": "ppo"}, {"env_step": 1600000, "rew": 211.0199996948242, "rew_std": 101.37994700391509, "Agent": "ppo"}, {"env_step": 1700000, "rew": 214.6699966430664, "rew_std": 100.4095196324463, "Agent": "ppo"}, {"env_step": 1800000, "rew": 247.6599998474121, "rew_std": 104.51677601247573, "Agent": "ppo"}, {"env_step": 1900000, "rew": 279.729997253418, "rew_std": 113.7720723989544, "Agent": "ppo"}, {"env_step": 2000000, "rew": 280.9099998474121, "rew_std": 106.76945620737145, "Agent": "ppo"}, {"env_step": 2100000, "rew": 288.3299987792969, "rew_std": 102.17095623218948, "Agent": "ppo"}, {"env_step": 2200000, "rew": 271.02000427246094, "rew_std": 118.16367469140917, "Agent": "ppo"}, {"env_step": 2300000, "rew": 269.90000305175784, "rew_std": 85.66159506214764, "Agent": "ppo"}, {"env_step": 2400000, "rew": 296.75999755859374, "rew_std": 95.57390780369239, "Agent": "ppo"}, {"env_step": 2500000, "rew": 300.6899978637695, "rew_std": 87.79065128890048, "Agent": "ppo"}, {"env_step": 2600000, "rew": 320.3000015258789, "rew_std": 91.139115860422, "Agent": "ppo"}, {"env_step": 2700000, "rew": 333.9300018310547, "rew_std": 89.31125875672396, "Agent": "ppo"}, {"env_step": 2800000, "rew": 327.020002746582, "rew_std": 120.26429493395794, "Agent": "ppo"}, {"env_step": 2900000, "rew": 361.1499954223633, "rew_std": 109.55767628353465, "Agent": "ppo"}, {"env_step": 3000000, "rew": 302.6900039672852, "rew_std": 98.16769631029796, "Agent": "ppo"}, {"env_step": 3100000, "rew": 315.95, "rew_std": 70.34762657993309, "Agent": "ppo"}, {"env_step": 3200000, "rew": 318.9499984741211, "rew_std": 115.132872934207, "Agent": "ppo"}, {"env_step": 3300000, "rew": 363.1899978637695, "rew_std": 85.86564602287181, "Agent": "ppo"}, {"env_step": 3400000, "rew": 368.1300018310547, "rew_std": 95.21212558703517, "Agent": "ppo"}, {"env_step": 3500000, "rew": 350.01000518798827, "rew_std": 93.16288451590842, "Agent": "ppo"}, {"env_step": 3600000, "rew": 388.5899993896484, "rew_std": 123.07756326920718, "Agent": "ppo"}, {"env_step": 3700000, "rew": 417.2999969482422, "rew_std": 100.34292919570994, "Agent": "ppo"}, {"env_step": 3800000, "rew": 461.9800048828125, "rew_std": 110.07789422926527, "Agent": "ppo"}, {"env_step": 3900000, "rew": 426.2000030517578, "rew_std": 88.50425199374192, "Agent": "ppo"}, {"env_step": 4000000, "rew": 449.4999969482422, "rew_std": 82.22106733939164, "Agent": "ppo"}, {"env_step": 4100000, "rew": 459.7000030517578, "rew_std": 107.17836845612624, "Agent": "ppo"}, {"env_step": 4200000, "rew": 465.42999572753905, "rew_std": 70.88704020848354, "Agent": "ppo"}, {"env_step": 4300000, "rew": 477.6600067138672, "rew_std": 132.27509463627436, "Agent": "ppo"}, {"env_step": 4400000, "rew": 410.0800048828125, "rew_std": 95.43147426617367, "Agent": "ppo"}, {"env_step": 4500000, "rew": 447.7100006103516, "rew_std": 70.30009378544847, "Agent": "ppo"}, {"env_step": 4600000, "rew": 462.73999786376953, "rew_std": 135.12483044104556, "Agent": "ppo"}, {"env_step": 4700000, "rew": 506.6499908447266, "rew_std": 102.90543516850502, "Agent": "ppo"}, {"env_step": 4800000, "rew": 504.0899993896484, "rew_std": 128.18980369342842, "Agent": "ppo"}, {"env_step": 4900000, "rew": 534.0000030517579, "rew_std": 116.97765592191337, "Agent": "ppo"}, {"env_step": 5000000, "rew": 513.1700057983398, "rew_std": 134.31567368090884, "Agent": "ppo"}, {"env_step": 5100000, "rew": 599.9900024414062, "rew_std": 114.45719219959994, "Agent": "ppo"}, {"env_step": 5200000, "rew": 602.4800048828125, "rew_std": 115.42681306636841, "Agent": "ppo"}, {"env_step": 5300000, "rew": 560.45, "rew_std": 147.17203595366996, "Agent": "ppo"}, {"env_step": 5400000, "rew": 542.2500030517579, "rew_std": 112.07122589244243, "Agent": "ppo"}, {"env_step": 5500000, "rew": 658.7099975585937, "rew_std": 119.60214061911367, "Agent": "ppo"}, {"env_step": 5600000, "rew": 624.2800048828125, "rew_std": 68.26668122502235, "Agent": "ppo"}, {"env_step": 5700000, "rew": 587.1499908447265, "rew_std": 79.0157166299757, "Agent": "ppo"}, {"env_step": 5800000, "rew": 649.9000061035156, "rew_std": 150.5787790102997, "Agent": "ppo"}, {"env_step": 5900000, "rew": 665.9100036621094, "rew_std": 119.23486176362321, "Agent": "ppo"}, {"env_step": 6000000, "rew": 706.7900024414063, "rew_std": 129.37492925213294, "Agent": "ppo"}, {"env_step": 6100000, "rew": 643.3500030517578, "rew_std": 116.35478540380245, "Agent": "ppo"}, {"env_step": 6200000, "rew": 721.2799926757813, "rew_std": 105.70935856689015, "Agent": "ppo"}, {"env_step": 6300000, "rew": 650.6100036621094, "rew_std": 148.55013326415474, "Agent": "ppo"}, {"env_step": 6400000, "rew": 768.1000030517578, "rew_std": 139.68046925775155, "Agent": "ppo"}, {"env_step": 6500000, "rew": 764.2999877929688, "rew_std": 118.32171828460929, "Agent": "ppo"}, {"env_step": 6600000, "rew": 782.5200073242188, "rew_std": 120.42663578622158, "Agent": "ppo"}, {"env_step": 6700000, "rew": 727.1499938964844, "rew_std": 130.45287574227336, "Agent": "ppo"}, {"env_step": 6800000, "rew": 783.6400024414063, "rew_std": 90.19995201850675, "Agent": "ppo"}, {"env_step": 6900000, "rew": 819.2, "rew_std": 132.53455239250277, "Agent": "ppo"}, {"env_step": 7000000, "rew": 794.2299987792969, "rew_std": 115.94847255070289, "Agent": "ppo"}, {"env_step": 7100000, "rew": 844.1199890136719, "rew_std": 122.43119457526758, "Agent": "ppo"}, {"env_step": 7200000, "rew": 889.6299987792969, "rew_std": 118.17887362331, "Agent": "ppo"}, {"env_step": 7300000, "rew": 861.8200012207031, "rew_std": 80.9622334545407, "Agent": "ppo"}, {"env_step": 7400000, "rew": 857.1600036621094, "rew_std": 95.34967659902838, "Agent": "ppo"}, {"env_step": 7500000, "rew": 892.6200073242187, "rew_std": 123.67628608131649, "Agent": "ppo"}, {"env_step": 7600000, "rew": 841.9999877929688, "rew_std": 86.62423578644787, "Agent": "ppo"}, {"env_step": 7700000, "rew": 890.85, "rew_std": 97.07490413687609, "Agent": "ppo"}, {"env_step": 7800000, "rew": 909.2900024414063, "rew_std": 86.85939542942558, "Agent": "ppo"}, {"env_step": 7900000, "rew": 906.0800048828125, "rew_std": 88.32103888799158, "Agent": "ppo"}, {"env_step": 8000000, "rew": 931.8700073242187, "rew_std": 80.47352350435204, "Agent": "ppo"}, {"env_step": 8100000, "rew": 938.1000061035156, "rew_std": 122.04440568291714, "Agent": "ppo"}, {"env_step": 8200000, "rew": 945.4200073242188, "rew_std": 66.3337244036705, "Agent": "ppo"}, {"env_step": 8300000, "rew": 971.860009765625, "rew_std": 91.97996093457455, "Agent": "ppo"}, {"env_step": 8400000, "rew": 1005.7700073242188, "rew_std": 129.41220377608226, "Agent": "ppo"}, {"env_step": 8500000, "rew": 972.6299926757813, "rew_std": 93.78562067872805, "Agent": "ppo"}, {"env_step": 8600000, "rew": 1000.3700012207031, "rew_std": 41.05399464776146, "Agent": "ppo"}, {"env_step": 8700000, "rew": 992.8999938964844, "rew_std": 132.0640325185449, "Agent": "ppo"}, {"env_step": 8800000, "rew": 1002.5400024414063, "rew_std": 93.22239772526292, "Agent": "ppo"}, {"env_step": 8900000, "rew": 979.8400085449218, "rew_std": 81.5561815195113, "Agent": "ppo"}, {"env_step": 9000000, "rew": 999.1100036621094, "rew_std": 134.88361081259976, "Agent": "ppo"}, {"env_step": 9100000, "rew": 1019.3699890136719, "rew_std": 110.85209895729604, "Agent": "ppo"}, {"env_step": 9200000, "rew": 1059.209979248047, "rew_std": 97.60941217671626, "Agent": "ppo"}, {"env_step": 9300000, "rew": 1088.9700012207031, "rew_std": 108.06601150231093, "Agent": "ppo"}, {"env_step": 9400000, "rew": 1053.210009765625, "rew_std": 92.88492116582314, "Agent": "ppo"}, {"env_step": 9500000, "rew": 1086.1199890136718, "rew_std": 105.10582835713497, "Agent": "ppo"}, {"env_step": 9600000, "rew": 1088.6500183105468, "rew_std": 111.11191848457115, "Agent": "ppo"}, {"env_step": 9700000, "rew": 1098.8700073242187, "rew_std": 110.51410372623518, "Agent": "ppo"}, {"env_step": 9800000, "rew": 1096.7400024414062, "rew_std": 105.4631692708768, "Agent": "ppo"}, {"env_step": 9900000, "rew": 1075.4200134277344, "rew_std": 89.31879559577925, "Agent": "ppo"}, {"env_step": 10000000, "rew": 1081.520001220703, "rew_std": 84.75678447189097, "Agent": "ppo"}]
examples/atari/benchmark/MsPacmanNoFrameskip-v4/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"env_step": 0, "rew": 232.0, "rew_std": 98.97373388935065, "Agent": "c51"}, {"env_step": 100000, "rew": 471.9, "rew_std": 188.48206811259263, "Agent": "c51"}, {"env_step": 200000, "rew": 674.3, "rew_std": 146.9837065800152, "Agent": "c51"}, {"env_step": 300000, "rew": 971.9, "rew_std": 232.55900326583787, "Agent": "c51"}, {"env_step": 400000, "rew": 1213.2, "rew_std": 339.9396417012879, "Agent": "c51"}, {"env_step": 500000, "rew": 1105.6, "rew_std": 205.68480741172888, "Agent": "c51"}, {"env_step": 600000, "rew": 1321.9, "rew_std": 232.04717192846803, "Agent": "c51"}, {"env_step": 700000, "rew": 1380.7, "rew_std": 526.1245194818429, "Agent": "c51"}, {"env_step": 800000, "rew": 1383.7, "rew_std": 241.8123445980374, "Agent": "c51"}, {"env_step": 900000, "rew": 1527.4, "rew_std": 273.0213178489914, "Agent": "c51"}, {"env_step": 1000000, "rew": 1433.2, "rew_std": 211.11788176277253, "Agent": "c51"}, {"env_step": 1100000, "rew": 1540.9, "rew_std": 213.74632160577642, "Agent": "c51"}, {"env_step": 1200000, "rew": 1537.3, "rew_std": 244.24375119949335, "Agent": "c51"}, {"env_step": 1300000, "rew": 1540.3, "rew_std": 281.00250888559697, "Agent": "c51"}, {"env_step": 1400000, "rew": 1688.0, "rew_std": 280.55480747975076, "Agent": "c51"}, {"env_step": 1500000, "rew": 1538.1, "rew_std": 241.28466590316094, "Agent": "c51"}, {"env_step": 1600000, "rew": 1563.0, "rew_std": 274.25572008619986, "Agent": "c51"}, {"env_step": 1700000, "rew": 1590.1, "rew_std": 294.4925975300568, "Agent": "c51"}, {"env_step": 1800000, "rew": 1855.9, "rew_std": 304.23755520974066, "Agent": "c51"}, {"env_step": 1900000, "rew": 1742.9, "rew_std": 189.30052826128087, "Agent": "c51"}, {"env_step": 2000000, "rew": 1842.5, "rew_std": 387.4669663339057, "Agent": "c51"}, {"env_step": 2100000, "rew": 1696.0, "rew_std": 306.7631007797385, "Agent": "c51"}, {"env_step": 2200000, "rew": 1946.0, "rew_std": 377.3793847045702, "Agent": "c51"}, {"env_step": 2300000, "rew": 1633.2, "rew_std": 193.38914137045026, "Agent": "c51"}, {"env_step": 2400000, "rew": 1732.7, "rew_std": 415.1855127530343, "Agent": "c51"}, {"env_step": 2500000, "rew": 2071.9, "rew_std": 396.92983007075696, "Agent": "c51"}, {"env_step": 2600000, "rew": 1844.4, "rew_std": 247.90207744188027, "Agent": "c51"}, {"env_step": 2700000, "rew": 1785.0, "rew_std": 315.5059428917306, "Agent": "c51"}, {"env_step": 2800000, "rew": 2009.1, "rew_std": 355.064627920045, "Agent": "c51"}, {"env_step": 2900000, "rew": 1977.9, "rew_std": 321.0471772185515, "Agent": "c51"}, {"env_step": 3000000, "rew": 1903.5, "rew_std": 249.7784017884653, "Agent": "c51"}, {"env_step": 3100000, "rew": 1831.5, "rew_std": 293.79082695005985, "Agent": "c51"}, {"env_step": 3200000, "rew": 2088.6, "rew_std": 283.78238141223636, "Agent": "c51"}, {"env_step": 3300000, "rew": 2027.6, "rew_std": 295.14003455986784, "Agent": "c51"}, {"env_step": 3400000, "rew": 2003.4, "rew_std": 174.97668416106185, "Agent": "c51"}, {"env_step": 3500000, "rew": 2107.5, "rew_std": 332.1888769962053, "Agent": "c51"}, {"env_step": 3600000, "rew": 1979.6, "rew_std": 317.34372532003846, "Agent": "c51"}, {"env_step": 3700000, "rew": 1993.1, "rew_std": 282.6320753205481, "Agent": "c51"}, {"env_step": 3800000, "rew": 1860.6, "rew_std": 249.90246097227612, "Agent": "c51"}, {"env_step": 3900000, "rew": 2034.8, "rew_std": 301.3057583253264, "Agent": "c51"}, {"env_step": 4000000, "rew": 2045.1, "rew_std": 451.0275933909144, "Agent": "c51"}, {"env_step": 4100000, "rew": 2082.3, "rew_std": 434.06291018699125, "Agent": "c51"}, {"env_step": 4200000, "rew": 2143.4, "rew_std": 325.0452891521426, "Agent": "c51"}, {"env_step": 4300000, "rew": 2178.5, "rew_std": 244.46441458829955, "Agent": "c51"}, {"env_step": 4400000, "rew": 2125.9, "rew_std": 343.1540324693854, "Agent": "c51"}, {"env_step": 4500000, "rew": 1984.6, "rew_std": 215.0196270111173, "Agent": "c51"}, {"env_step": 4600000, "rew": 2114.5, "rew_std": 265.95535339601645, "Agent": "c51"}, {"env_step": 4700000, "rew": 1941.8, "rew_std": 327.6485312037886, "Agent": "c51"}, {"env_step": 4800000, "rew": 2072.2, "rew_std": 302.6386624342633, "Agent": "c51"}, {"env_step": 4900000, "rew": 1997.1, "rew_std": 483.2809638295306, "Agent": "c51"}, {"env_step": 5000000, "rew": 2116.6, "rew_std": 315.6698275096941, "Agent": "c51"}, {"env_step": 5100000, "rew": 2136.5, "rew_std": 296.60318609212544, "Agent": "c51"}, {"env_step": 5200000, "rew": 1947.6, "rew_std": 303.33222710420995, "Agent": "c51"}, {"env_step": 5300000, "rew": 2043.2, "rew_std": 348.6327007037636, "Agent": "c51"}, {"env_step": 5400000, "rew": 2003.5, "rew_std": 181.4779600943321, "Agent": "c51"}, {"env_step": 5500000, "rew": 2042.7, "rew_std": 349.1080205323275, "Agent": "c51"}, {"env_step": 5600000, "rew": 2124.6, "rew_std": 246.09843559031415, "Agent": "c51"}, {"env_step": 5700000, "rew": 1958.8, "rew_std": 204.2428946132521, "Agent": "c51"}, {"env_step": 5800000, "rew": 2104.0, "rew_std": 256.8260111437313, "Agent": "c51"}, {"env_step": 5900000, "rew": 2023.0, "rew_std": 217.54447821077878, "Agent": "c51"}, {"env_step": 6000000, "rew": 1912.1, "rew_std": 324.066490091154, "Agent": "c51"}, {"env_step": 6100000, "rew": 2112.9, "rew_std": 196.80114328936202, "Agent": "c51"}, {"env_step": 6200000, "rew": 2013.2, "rew_std": 245.47211654279596, "Agent": "c51"}, {"env_step": 6300000, "rew": 2207.9, "rew_std": 279.84583255785674, "Agent": "c51"}, {"env_step": 6400000, "rew": 2062.1, "rew_std": 293.391700632448, "Agent": "c51"}, {"env_step": 6500000, "rew": 2121.7, "rew_std": 276.0326248833641, "Agent": "c51"}, {"env_step": 6600000, "rew": 2086.0, "rew_std": 265.9657872734762, "Agent": "c51"}, {"env_step": 6700000, "rew": 1945.4, "rew_std": 268.4459722178748, "Agent": "c51"}, {"env_step": 6800000, "rew": 2025.5, "rew_std": 188.10648579993196, "Agent": "c51"}, {"env_step": 6900000, "rew": 2222.5, "rew_std": 192.25828980826807, "Agent": "c51"}, {"env_step": 7000000, "rew": 1962.0, "rew_std": 251.00756960697422, "Agent": "c51"}, {"env_step": 7100000, "rew": 2028.0, "rew_std": 182.15323219751002, "Agent": "c51"}, {"env_step": 7200000, "rew": 2155.4, "rew_std": 353.8415464582982, "Agent": "c51"}, {"env_step": 7300000, "rew": 2094.3, "rew_std": 366.438821633298, "Agent": "c51"}, {"env_step": 7400000, "rew": 2234.5, "rew_std": 312.953111503944, "Agent": "c51"}, {"env_step": 7500000, "rew": 2193.4, "rew_std": 310.7629965102023, "Agent": "c51"}, {"env_step": 7600000, "rew": 2107.7, "rew_std": 326.7916920608601, "Agent": "c51"}, {"env_step": 7700000, "rew": 2056.6, "rew_std": 275.43790588806036, "Agent": "c51"}, {"env_step": 7800000, "rew": 2155.4, "rew_std": 317.36546756066576, "Agent": "c51"}, {"env_step": 7900000, "rew": 2004.5, "rew_std": 340.1814956754703, "Agent": "c51"}, {"env_step": 8000000, "rew": 2161.7, "rew_std": 239.97793648583612, "Agent": "c51"}, {"env_step": 8100000, "rew": 1823.4, "rew_std": 288.0486764420208, "Agent": "c51"}, {"env_step": 8200000, "rew": 2090.5, "rew_std": 331.0233375458594, "Agent": "c51"}, {"env_step": 8300000, "rew": 1968.3, "rew_std": 320.45063582399086, "Agent": "c51"}, {"env_step": 8400000, "rew": 2106.9, "rew_std": 403.024428540008, "Agent": "c51"}, {"env_step": 8500000, "rew": 2226.4, "rew_std": 361.9058441086577, "Agent": "c51"}, {"env_step": 8600000, "rew": 1958.6, "rew_std": 128.12353413795609, "Agent": "c51"}, {"env_step": 8700000, "rew": 1775.2, "rew_std": 258.49982591870344, "Agent": "c51"}, {"env_step": 8800000, "rew": 2074.7, "rew_std": 342.712138682014, "Agent": "c51"}, {"env_step": 8900000, "rew": 1892.9, "rew_std": 167.9615729862042, "Agent": "c51"}, {"env_step": 9000000, "rew": 2107.6, "rew_std": 225.7517220310844, "Agent": "c51"}, {"env_step": 9100000, "rew": 1941.5, "rew_std": 392.5196173441526, "Agent": "c51"}, {"env_step": 9200000, "rew": 2188.9, "rew_std": 298.97974847805324, "Agent": "c51"}, {"env_step": 9300000, "rew": 2027.5, "rew_std": 442.07267502074814, "Agent": "c51"}, {"env_step": 9400000, "rew": 2137.9, "rew_std": 365.06340545171054, "Agent": "c51"}, {"env_step": 9500000, "rew": 2254.9, "rew_std": 201.1842190630269, "Agent": "c51"}, {"env_step": 9600000, "rew": 1965.5, "rew_std": 197.30040547348096, "Agent": "c51"}, {"env_step": 9700000, "rew": 2237.9, "rew_std": 387.8250765486935, "Agent": "c51"}, {"env_step": 9800000, "rew": 2177.9, "rew_std": 237.57838706414353, "Agent": "c51"}, {"env_step": 9900000, "rew": 2144.3, "rew_std": 298.6901571863392, "Agent": "c51"}, {"env_step": 10000000, "rew": 2196.7, "rew_std": 370.68991084193266, "Agent": "c51"}, {"env_step": 0, "rew": 131.5, "rew_std": 68.65020029104068, "Agent": "dqn"}, {"env_step": 100000, "rew": 614.7, "rew_std": 204.22930739734684, "Agent": "dqn"}, {"env_step": 200000, "rew": 701.0, "rew_std": 207.84417239845817, "Agent": "dqn"}, {"env_step": 300000, "rew": 810.8, "rew_std": 271.50977882941896, "Agent": "dqn"}, {"env_step": 400000, "rew": 834.8, "rew_std": 215.7822050123689, "Agent": "dqn"}, {"env_step": 500000, "rew": 909.7, "rew_std": 287.6004346311041, "Agent": "dqn"}, {"env_step": 600000, "rew": 1064.1, "rew_std": 212.43608450543425, "Agent": "dqn"}, {"env_step": 700000, "rew": 1294.5, "rew_std": 225.1289630411867, "Agent": "dqn"}, {"env_step": 800000, "rew": 1285.4, "rew_std": 259.38820327840665, "Agent": "dqn"}, {"env_step": 900000, "rew": 1304.9, "rew_std": 298.1504485993607, "Agent": "dqn"}, {"env_step": 1000000, "rew": 1251.2, "rew_std": 232.5570037646684, "Agent": "dqn"}, {"env_step": 1100000, "rew": 1391.9, "rew_std": 193.24205028926804, "Agent": "dqn"}, {"env_step": 1200000, "rew": 1458.9, "rew_std": 220.16468835851038, "Agent": "dqn"}, {"env_step": 1300000, "rew": 1373.2, "rew_std": 253.95267275616533, "Agent": "dqn"}, {"env_step": 1400000, "rew": 1561.5, "rew_std": 382.30334814123717, "Agent": "dqn"}, {"env_step": 1500000, "rew": 1512.3, "rew_std": 150.26180486071635, "Agent": "dqn"}, {"env_step": 1600000, "rew": 1592.5, "rew_std": 247.11060276726292, "Agent": "dqn"}, {"env_step": 1700000, "rew": 1433.2, "rew_std": 348.8689725384016, "Agent": "dqn"}, {"env_step": 1800000, "rew": 1603.8, "rew_std": 354.88640436060666, "Agent": "dqn"}, {"env_step": 1900000, "rew": 1709.6, "rew_std": 354.1762837909958, "Agent": "dqn"}, {"env_step": 2000000, "rew": 1364.8, "rew_std": 449.32745297833736, "Agent": "dqn"}, {"env_step": 2100000, "rew": 1460.0, "rew_std": 327.8566760034024, "Agent": "dqn"}, {"env_step": 2200000, "rew": 1597.0, "rew_std": 265.5627985995026, "Agent": "dqn"}, {"env_step": 2300000, "rew": 1751.1, "rew_std": 489.0773865146496, "Agent": "dqn"}, {"env_step": 2400000, "rew": 1606.6, "rew_std": 194.81129330713864, "Agent": "dqn"}, {"env_step": 2500000, "rew": 1659.0, "rew_std": 233.647169895122, "Agent": "dqn"}, {"env_step": 2600000, "rew": 1689.7, "rew_std": 161.25510844621326, "Agent": "dqn"}, {"env_step": 2700000, "rew": 1622.3, "rew_std": 451.706552974384, "Agent": "dqn"}, {"env_step": 2800000, "rew": 1908.7, "rew_std": 397.6817949064302, "Agent": "dqn"}, {"env_step": 2900000, "rew": 1810.7, "rew_std": 342.4213924391991, "Agent": "dqn"}, {"env_step": 3000000, "rew": 1618.5, "rew_std": 344.36412414768176, "Agent": "dqn"}, {"env_step": 3100000, "rew": 1750.4, "rew_std": 284.15425388334415, "Agent": "dqn"}, {"env_step": 3200000, "rew": 1895.3, "rew_std": 299.36167089325244, "Agent": "dqn"}, {"env_step": 3300000, "rew": 1750.6, "rew_std": 258.5123594724244, "Agent": "dqn"}, {"env_step": 3400000, "rew": 1768.8, "rew_std": 519.2761885547998, "Agent": "dqn"}, {"env_step": 3500000, "rew": 1923.8, "rew_std": 338.9373983496067, "Agent": "dqn"}, {"env_step": 3600000, "rew": 1848.1, "rew_std": 409.1123195407344, "Agent": "dqn"}, {"env_step": 3700000, "rew": 1954.5, "rew_std": 319.1367261848752, "Agent": "dqn"}, {"env_step": 3800000, "rew": 1761.1, "rew_std": 322.0672134819066, "Agent": "dqn"}, {"env_step": 3900000, "rew": 1843.7, "rew_std": 310.8404896405872, "Agent": "dqn"}, {"env_step": 4000000, "rew": 2049.1, "rew_std": 425.3754694384715, "Agent": "dqn"}, {"env_step": 4100000, "rew": 1596.5, "rew_std": 433.0591760949074, "Agent": "dqn"}, {"env_step": 4200000, "rew": 1982.2, "rew_std": 302.4684446351388, "Agent": "dqn"}, {"env_step": 4300000, "rew": 1967.3, "rew_std": 453.393217858406, "Agent": "dqn"}, {"env_step": 4400000, "rew": 1863.3, "rew_std": 420.0681016216299, "Agent": "dqn"}, {"env_step": 4500000, "rew": 2167.2, "rew_std": 428.3346355362825, "Agent": "dqn"}, {"env_step": 4600000, "rew": 1978.7, "rew_std": 467.56562106296906, "Agent": "dqn"}, {"env_step": 4700000, "rew": 2055.6, "rew_std": 222.23060095315407, "Agent": "dqn"}, {"env_step": 4800000, "rew": 1964.6, "rew_std": 371.6714140205028, "Agent": "dqn"}, {"env_step": 4900000, "rew": 1900.1, "rew_std": 293.1492623221147, "Agent": "dqn"}, {"env_step": 5000000, "rew": 1980.9, "rew_std": 444.9308822727413, "Agent": "dqn"}, {"env_step": 5100000, "rew": 1902.8, "rew_std": 498.980921478968, "Agent": "dqn"}, {"env_step": 5200000, "rew": 2109.0, "rew_std": 436.7092854520041, "Agent": "dqn"}, {"env_step": 5300000, "rew": 1968.1, "rew_std": 310.17428971466995, "Agent": "dqn"}, {"env_step": 5400000, "rew": 1976.0, "rew_std": 482.8349614516331, "Agent": "dqn"}, {"env_step": 5500000, "rew": 1849.4, "rew_std": 398.51930944434804, "Agent": "dqn"}, {"env_step": 5600000, "rew": 1880.3, "rew_std": 403.730863323576, "Agent": "dqn"}, {"env_step": 5700000, "rew": 2198.5, "rew_std": 510.2962374934779, "Agent": "dqn"}, {"env_step": 5800000, "rew": 2004.4, "rew_std": 301.12960664803455, "Agent": "dqn"}, {"env_step": 5900000, "rew": 2048.1, "rew_std": 450.4685227626898, "Agent": "dqn"}, {"env_step": 6000000, "rew": 2285.8, "rew_std": 433.295003432996, "Agent": "dqn"}, {"env_step": 6100000, "rew": 2123.8, "rew_std": 344.2341644869085, "Agent": "dqn"}, {"env_step": 6200000, "rew": 2220.9, "rew_std": 532.900825670218, "Agent": "dqn"}, {"env_step": 6300000, "rew": 2083.9, "rew_std": 434.93619072227136, "Agent": "dqn"}, {"env_step": 6400000, "rew": 2324.8, "rew_std": 359.76959293414444, "Agent": "dqn"}, {"env_step": 6500000, "rew": 1959.3, "rew_std": 218.1110038489576, "Agent": "dqn"}, {"env_step": 6600000, "rew": 2100.8, "rew_std": 346.50073592995443, "Agent": "dqn"}, {"env_step": 6700000, "rew": 1936.2, "rew_std": 392.79404272468287, "Agent": "dqn"}, {"env_step": 6800000, "rew": 2225.4, "rew_std": 382.2230239009681, "Agent": "dqn"}, {"env_step": 6900000, "rew": 2039.2, "rew_std": 316.61895079101, "Agent": "dqn"}, {"env_step": 7000000, "rew": 2102.3, "rew_std": 456.89212074624356, "Agent": "dqn"}, {"env_step": 7100000, "rew": 2108.4, "rew_std": 328.34195589354704, "Agent": "dqn"}, {"env_step": 7200000, "rew": 1930.3, "rew_std": 574.398824859522, "Agent": "dqn"}, {"env_step": 7300000, "rew": 1915.3, "rew_std": 206.36184240309544, "Agent": "dqn"}, {"env_step": 7400000, "rew": 2049.0, "rew_std": 228.25950144517535, "Agent": "dqn"}, {"env_step": 7500000, "rew": 1851.8, "rew_std": 267.78043244419484, "Agent": "dqn"}, {"env_step": 7600000, "rew": 1897.6, "rew_std": 284.4177209668905, "Agent": "dqn"}, {"env_step": 7700000, "rew": 2028.1, "rew_std": 348.8350469777943, "Agent": "dqn"}, {"env_step": 7800000, "rew": 1792.0, "rew_std": 491.49120032814426, "Agent": "dqn"}, {"env_step": 7900000, "rew": 1943.1, "rew_std": 397.7744209976303, "Agent": "dqn"}, {"env_step": 8000000, "rew": 1958.2, "rew_std": 320.44587686534527, "Agent": "dqn"}, {"env_step": 8100000, "rew": 1928.8, "rew_std": 440.1206198305187, "Agent": "dqn"}, {"env_step": 8200000, "rew": 1939.0, "rew_std": 313.2452713130719, "Agent": "dqn"}, {"env_step": 8300000, "rew": 1952.0, "rew_std": 200.15993605114886, "Agent": "dqn"}, {"env_step": 8400000, "rew": 2045.5, "rew_std": 214.5685205243304, "Agent": "dqn"}, {"env_step": 8500000, "rew": 1957.4, "rew_std": 295.4894921989613, "Agent": "dqn"}, {"env_step": 8600000, "rew": 1757.6, "rew_std": 440.1148032048002, "Agent": "dqn"}, {"env_step": 8700000, "rew": 1913.0, "rew_std": 399.30990471061443, "Agent": "dqn"}, {"env_step": 8800000, "rew": 1883.1, "rew_std": 390.40759470071794, "Agent": "dqn"}, {"env_step": 8900000, "rew": 2099.2, "rew_std": 247.28234874329385, "Agent": "dqn"}, {"env_step": 9000000, "rew": 1986.4, "rew_std": 552.3334500100459, "Agent": "dqn"}, {"env_step": 9100000, "rew": 1933.1, "rew_std": 483.2095715111612, "Agent": "dqn"}, {"env_step": 9200000, "rew": 1978.8, "rew_std": 328.87955242003113, "Agent": "dqn"}, {"env_step": 9300000, "rew": 2072.5, "rew_std": 331.83045369585955, "Agent": "dqn"}, {"env_step": 9400000, "rew": 2054.7, "rew_std": 349.56088167871417, "Agent": "dqn"}, {"env_step": 9500000, "rew": 1718.4, "rew_std": 509.64207047691815, "Agent": "dqn"}, {"env_step": 9600000, "rew": 1724.2, "rew_std": 495.79507863632534, "Agent": "dqn"}, {"env_step": 9700000, "rew": 1770.2, "rew_std": 291.362591970898, "Agent": "dqn"}, {"env_step": 9800000, "rew": 1836.1, "rew_std": 500.204048364265, "Agent": "dqn"}, {"env_step": 9900000, "rew": 2061.6, "rew_std": 311.6386368857366, "Agent": "dqn"}, {"env_step": 10000000, "rew": 1889.7, "rew_std": 482.98779487684783, "Agent": "dqn"}, {"env_step": 0, "rew": 209.6, "rew_std": 147.36770338171112, "Agent": "fqf"}, {"env_step": 100000, "rew": 564.9, "rew_std": 161.8928349248354, "Agent": "fqf"}, {"env_step": 200000, "rew": 767.5, "rew_std": 204.61732575713134, "Agent": "fqf"}, {"env_step": 300000, "rew": 1075.9, "rew_std": 288.21535351191824, "Agent": "fqf"}, {"env_step": 400000, "rew": 1022.0, "rew_std": 265.35787156216037, "Agent": "fqf"}, {"env_step": 500000, "rew": 1071.1, "rew_std": 377.931329741264, "Agent": "fqf"}, {"env_step": 600000, "rew": 1151.7, "rew_std": 300.1049982922644, "Agent": "fqf"}, {"env_step": 700000, "rew": 1196.7, "rew_std": 103.24538730616491, "Agent": "fqf"}, {"env_step": 800000, "rew": 1514.1, "rew_std": 486.713149606624, "Agent": "fqf"}, {"env_step": 900000, "rew": 1626.9, "rew_std": 316.3809254680187, "Agent": "fqf"}, {"env_step": 1000000, "rew": 1582.2, "rew_std": 369.04926500400995, "Agent": "fqf"}, {"env_step": 1100000, "rew": 1562.1, "rew_std": 180.65572230073423, "Agent": "fqf"}, {"env_step": 1200000, "rew": 1479.4, "rew_std": 133.2525421896333, "Agent": "fqf"}, {"env_step": 1300000, "rew": 1791.1, "rew_std": 167.81623878516643, "Agent": "fqf"}, {"env_step": 1400000, "rew": 1891.5, "rew_std": 251.22788459882395, "Agent": "fqf"}, {"env_step": 1500000, "rew": 1736.2, "rew_std": 316.84216891064233, "Agent": "fqf"}, {"env_step": 1600000, "rew": 2012.1, "rew_std": 421.7516923498944, "Agent": "fqf"}, {"env_step": 1700000, "rew": 1790.0, "rew_std": 352.4060158396845, "Agent": "fqf"}, {"env_step": 1800000, "rew": 1927.9, "rew_std": 216.36147993577785, "Agent": "fqf"}, {"env_step": 1900000, "rew": 2056.4, "rew_std": 326.7314493586438, "Agent": "fqf"}, {"env_step": 2000000, "rew": 2039.8, "rew_std": 309.31789472967773, "Agent": "fqf"}, {"env_step": 2100000, "rew": 2133.9, "rew_std": 337.53442787366146, "Agent": "fqf"}, {"env_step": 2200000, "rew": 2087.1, "rew_std": 346.3381151418365, "Agent": "fqf"}, {"env_step": 2300000, "rew": 2096.6, "rew_std": 242.30361119884284, "Agent": "fqf"}, {"env_step": 2400000, "rew": 2233.8, "rew_std": 185.708804314712, "Agent": "fqf"}, {"env_step": 2500000, "rew": 1932.5, "rew_std": 284.03248053699775, "Agent": "fqf"}, {"env_step": 2600000, "rew": 1983.8, "rew_std": 354.9137359979182, "Agent": "fqf"}, {"env_step": 2700000, "rew": 2270.3, "rew_std": 424.8583410973592, "Agent": "fqf"}, {"env_step": 2800000, "rew": 2289.4, "rew_std": 383.8, "Agent": "fqf"}, {"env_step": 2900000, "rew": 2172.8, "rew_std": 362.5851072506978, "Agent": "fqf"}, {"env_step": 3000000, "rew": 2144.2, "rew_std": 308.21057736554076, "Agent": "fqf"}, {"env_step": 3100000, "rew": 2074.5, "rew_std": 176.32711079127907, "Agent": "fqf"}, {"env_step": 3200000, "rew": 2291.0, "rew_std": 323.1361941968123, "Agent": "fqf"}, {"env_step": 3300000, "rew": 2121.9, "rew_std": 152.47062012073016, "Agent": "fqf"}, {"env_step": 3400000, "rew": 2160.7, "rew_std": 297.12726229681454, "Agent": "fqf"}, {"env_step": 3500000, "rew": 1965.6, "rew_std": 434.0376020577019, "Agent": "fqf"}, {"env_step": 3600000, "rew": 2088.7, "rew_std": 263.4023728063208, "Agent": "fqf"}, {"env_step": 3700000, "rew": 2288.5, "rew_std": 494.5837138442793, "Agent": "fqf"}, {"env_step": 3800000, "rew": 2077.7, "rew_std": 488.92699864090133, "Agent": "fqf"}, {"env_step": 3900000, "rew": 2320.0, "rew_std": 402.8208038321755, "Agent": "fqf"}, {"env_step": 4000000, "rew": 2162.4, "rew_std": 318.08275652729117, "Agent": "fqf"}, {"env_step": 4100000, "rew": 2152.5, "rew_std": 415.441271421124, "Agent": "fqf"}, {"env_step": 4200000, "rew": 2323.0, "rew_std": 441.1031625368379, "Agent": "fqf"}, {"env_step": 4300000, "rew": 2254.9, "rew_std": 369.04347982317745, "Agent": "fqf"}, {"env_step": 4400000, "rew": 2266.5, "rew_std": 468.3191753494619, "Agent": "fqf"}, {"env_step": 4500000, "rew": 2189.8, "rew_std": 463.9204242108769, "Agent": "fqf"}, {"env_step": 4600000, "rew": 2164.3, "rew_std": 272.80764285481445, "Agent": "fqf"}, {"env_step": 4700000, "rew": 2167.5, "rew_std": 151.73611962878186, "Agent": "fqf"}, {"env_step": 4800000, "rew": 2223.9, "rew_std": 334.81470995163875, "Agent": "fqf"}, {"env_step": 4900000, "rew": 2216.2, "rew_std": 302.2647184174825, "Agent": "fqf"}, {"env_step": 5000000, "rew": 2307.7, "rew_std": 463.2478926017905, "Agent": "fqf"}, {"env_step": 5100000, "rew": 2229.6, "rew_std": 334.5005829591333, "Agent": "fqf"}, {"env_step": 5200000, "rew": 2097.1, "rew_std": 257.26540770185176, "Agent": "fqf"}, {"env_step": 5300000, "rew": 2246.4, "rew_std": 323.79691165914477, "Agent": "fqf"}, {"env_step": 5400000, "rew": 2412.3, "rew_std": 271.80399187649914, "Agent": "fqf"}, {"env_step": 5500000, "rew": 2289.4, "rew_std": 326.94929270454156, "Agent": "fqf"}, {"env_step": 5600000, "rew": 2213.5, "rew_std": 387.96320701839755, "Agent": "fqf"}, {"env_step": 5700000, "rew": 2212.0, "rew_std": 371.89353315162657, "Agent": "fqf"}, {"env_step": 5800000, "rew": 2119.3, "rew_std": 379.4562030063549, "Agent": "fqf"}, {"env_step": 5900000, "rew": 2086.9, "rew_std": 480.4985848054081, "Agent": "fqf"}, {"env_step": 6000000, "rew": 2082.4, "rew_std": 193.79483997258544, "Agent": "fqf"}, {"env_step": 6100000, "rew": 2167.7, "rew_std": 307.71611917480044, "Agent": "fqf"}, {"env_step": 6200000, "rew": 2257.0, "rew_std": 330.79963724284823, "Agent": "fqf"}, {"env_step": 6300000, "rew": 2369.5, "rew_std": 511.182990718588, "Agent": "fqf"}, {"env_step": 6400000, "rew": 2233.4, "rew_std": 333.7532621563421, "Agent": "fqf"}, {"env_step": 6500000, "rew": 2179.4, "rew_std": 211.5184152739425, "Agent": "fqf"}, {"env_step": 6600000, "rew": 2365.5, "rew_std": 349.8263140474141, "Agent": "fqf"}, {"env_step": 6700000, "rew": 2255.2, "rew_std": 228.93309066187874, "Agent": "fqf"}, {"env_step": 6800000, "rew": 2354.2, "rew_std": 476.53138406614937, "Agent": "fqf"}, {"env_step": 6900000, "rew": 2295.9, "rew_std": 328.7827398146077, "Agent": "fqf"}, {"env_step": 7000000, "rew": 2294.0, "rew_std": 350.9675198647305, "Agent": "fqf"}, {"env_step": 7100000, "rew": 2279.9, "rew_std": 249.64392642321585, "Agent": "fqf"}, {"env_step": 7200000, "rew": 2247.8, "rew_std": 342.9891543474808, "Agent": "fqf"}, {"env_step": 7300000, "rew": 2430.6, "rew_std": 378.38821334708615, "Agent": "fqf"}, {"env_step": 7400000, "rew": 2458.8, "rew_std": 619.2716366829665, "Agent": "fqf"}, {"env_step": 7500000, "rew": 2126.3, "rew_std": 379.9394820231243, "Agent": "fqf"}, {"env_step": 7600000, "rew": 2294.9, "rew_std": 231.97821018362907, "Agent": "fqf"}, {"env_step": 7700000, "rew": 2384.8, "rew_std": 322.39317610644304, "Agent": "fqf"}, {"env_step": 7800000, "rew": 2327.4, "rew_std": 316.6234988120749, "Agent": "fqf"}, {"env_step": 7900000, "rew": 2369.9, "rew_std": 376.6129179940593, "Agent": "fqf"}, {"env_step": 8000000, "rew": 2459.9, "rew_std": 387.55888584833144, "Agent": "fqf"}, {"env_step": 8100000, "rew": 2432.6, "rew_std": 335.16748052279775, "Agent": "fqf"}, {"env_step": 8200000, "rew": 2405.6, "rew_std": 405.6466935647325, "Agent": "fqf"}, {"env_step": 8300000, "rew": 2475.6, "rew_std": 527.2349002105228, "Agent": "fqf"}, {"env_step": 8400000, "rew": 2371.0, "rew_std": 221.4050586594624, "Agent": "fqf"}, {"env_step": 8500000, "rew": 2384.4, "rew_std": 362.2496928915192, "Agent": "fqf"}, {"env_step": 8600000, "rew": 2201.5, "rew_std": 263.163162315701, "Agent": "fqf"}, {"env_step": 8700000, "rew": 2120.5, "rew_std": 442.54948875803706, "Agent": "fqf"}, {"env_step": 8800000, "rew": 2236.1, "rew_std": 213.28546598397182, "Agent": "fqf"}, {"env_step": 8900000, "rew": 2335.3, "rew_std": 308.52230065264325, "Agent": "fqf"}, {"env_step": 9000000, "rew": 2441.1, "rew_std": 316.18394962426544, "Agent": "fqf"}, {"env_step": 9100000, "rew": 2425.4, "rew_std": 412.7859493732799, "Agent": "fqf"}, {"env_step": 9200000, "rew": 2400.9, "rew_std": 393.80869721223786, "Agent": "fqf"}, {"env_step": 9300000, "rew": 2478.2, "rew_std": 338.00852060266175, "Agent": "fqf"}, {"env_step": 9400000, "rew": 2325.9, "rew_std": 250.8499352202428, "Agent": "fqf"}, {"env_step": 9500000, "rew": 2419.5, "rew_std": 283.8465254323188, "Agent": "fqf"}, {"env_step": 9600000, "rew": 2506.6, "rew_std": 402.5186206872919, "Agent": "fqf"}, {"env_step": 9700000, "rew": 2327.1, "rew_std": 375.1155688584519, "Agent": "fqf"}, {"env_step": 9800000, "rew": 2354.1, "rew_std": 263.84557983790444, "Agent": "fqf"}, {"env_step": 9900000, "rew": 2433.2, "rew_std": 550.9763697292291, "Agent": "fqf"}, {"env_step": 10000000, "rew": 2478.0, "rew_std": 184.68892765945662, "Agent": "fqf"}, {"env_step": 0, "rew": 147.9, "rew_std": 65.50030534279973, "Agent": "qrdqn"}, {"env_step": 100000, "rew": 658.8, "rew_std": 173.94355406280508, "Agent": "qrdqn"}, {"env_step": 200000, "rew": 901.9, "rew_std": 225.8364231030947, "Agent": "qrdqn"}, {"env_step": 300000, "rew": 947.9, "rew_std": 176.00366473457305, "Agent": "qrdqn"}, {"env_step": 400000, "rew": 984.4, "rew_std": 298.15405413980204, "Agent": "qrdqn"}, {"env_step": 500000, "rew": 1047.1, "rew_std": 262.9248752020242, "Agent": "qrdqn"}, {"env_step": 600000, "rew": 1181.9, "rew_std": 363.3566980255077, "Agent": "qrdqn"}, {"env_step": 700000, "rew": 1294.7, "rew_std": 404.93630363305283, "Agent": "qrdqn"}, {"env_step": 800000, "rew": 1202.4, "rew_std": 548.8827197134193, "Agent": "qrdqn"}, {"env_step": 900000, "rew": 1376.5, "rew_std": 180.5803145417573, "Agent": "qrdqn"}, {"env_step": 1000000, "rew": 1537.5, "rew_std": 396.10762426391136, "Agent": "qrdqn"}, {"env_step": 1100000, "rew": 1516.6, "rew_std": 246.3169502896624, "Agent": "qrdqn"}, {"env_step": 1200000, "rew": 1514.0, "rew_std": 284.75779181613274, "Agent": "qrdqn"}, {"env_step": 1300000, "rew": 1366.4, "rew_std": 237.443551186382, "Agent": "qrdqn"}, {"env_step": 1400000, "rew": 1548.5, "rew_std": 338.3723540716647, "Agent": "qrdqn"}, {"env_step": 1500000, "rew": 1512.0, "rew_std": 236.26468208346333, "Agent": "qrdqn"}, {"env_step": 1600000, "rew": 1628.0, "rew_std": 213.3972820820359, "Agent": "qrdqn"}, {"env_step": 1700000, "rew": 1573.0, "rew_std": 335.270338682085, "Agent": "qrdqn"}, {"env_step": 1800000, "rew": 1587.9, "rew_std": 292.99059711874713, "Agent": "qrdqn"}, {"env_step": 1900000, "rew": 1870.7, "rew_std": 401.1979685890745, "Agent": "qrdqn"}, {"env_step": 2000000, "rew": 1629.4, "rew_std": 364.31859683524254, "Agent": "qrdqn"}, {"env_step": 2100000, "rew": 1756.7, "rew_std": 314.8085291093619, "Agent": "qrdqn"}, {"env_step": 2200000, "rew": 1695.5, "rew_std": 151.9527887207076, "Agent": "qrdqn"}, {"env_step": 2300000, "rew": 1751.0, "rew_std": 158.11894257172352, "Agent": "qrdqn"}, {"env_step": 2400000, "rew": 1726.3, "rew_std": 512.4833753401177, "Agent": "qrdqn"}, {"env_step": 2500000, "rew": 1908.5, "rew_std": 349.27532120091166, "Agent": "qrdqn"}, {"env_step": 2600000, "rew": 1737.8, "rew_std": 288.164813952016, "Agent": "qrdqn"}, {"env_step": 2700000, "rew": 1928.7, "rew_std": 390.23686396853896, "Agent": "qrdqn"}, {"env_step": 2800000, "rew": 1859.6, "rew_std": 433.55949995358196, "Agent": "qrdqn"}, {"env_step": 2900000, "rew": 1784.5, "rew_std": 345.3847854205509, "Agent": "qrdqn"}, {"env_step": 3000000, "rew": 1677.1, "rew_std": 354.96688577950476, "Agent": "qrdqn"}, {"env_step": 3100000, "rew": 1964.8, "rew_std": 299.83221974964596, "Agent": "qrdqn"}, {"env_step": 3200000, "rew": 1798.1, "rew_std": 198.37159574898823, "Agent": "qrdqn"}, {"env_step": 3300000, "rew": 1783.1, "rew_std": 387.8730333498322, "Agent": "qrdqn"}, {"env_step": 3400000, "rew": 1856.4, "rew_std": 299.4779457656273, "Agent": "qrdqn"}, {"env_step": 3500000, "rew": 2008.8, "rew_std": 300.43894554468136, "Agent": "qrdqn"}, {"env_step": 3600000, "rew": 2021.3, "rew_std": 332.70619170673694, "Agent": "qrdqn"}, {"env_step": 3700000, "rew": 1957.0, "rew_std": 290.85013323015687, "Agent": "qrdqn"}, {"env_step": 3800000, "rew": 1850.0, "rew_std": 178.16116299575506, "Agent": "qrdqn"}, {"env_step": 3900000, "rew": 1878.4, "rew_std": 399.1170755555317, "Agent": "qrdqn"}, {"env_step": 4000000, "rew": 2028.5, "rew_std": 457.8126800340943, "Agent": "qrdqn"}, {"env_step": 4100000, "rew": 1910.1, "rew_std": 372.1944249985483, "Agent": "qrdqn"}, {"env_step": 4200000, "rew": 1774.8, "rew_std": 338.8928444213598, "Agent": "qrdqn"}, {"env_step": 4300000, "rew": 1837.0, "rew_std": 452.25545878408144, "Agent": "qrdqn"}, {"env_step": 4400000, "rew": 2068.4, "rew_std": 295.75131445185497, "Agent": "qrdqn"}, {"env_step": 4500000, "rew": 1876.6, "rew_std": 202.19851631503136, "Agent": "qrdqn"}, {"env_step": 4600000, "rew": 1891.1, "rew_std": 480.94541270293865, "Agent": "qrdqn"}, {"env_step": 4700000, "rew": 1814.4, "rew_std": 474.1063593751934, "Agent": "qrdqn"}, {"env_step": 4800000, "rew": 2115.4, "rew_std": 403.2094244930294, "Agent": "qrdqn"}, {"env_step": 4900000, "rew": 2100.4, "rew_std": 376.1237030552581, "Agent": "qrdqn"}, {"env_step": 5000000, "rew": 1879.5, "rew_std": 195.04525115982702, "Agent": "qrdqn"}, {"env_step": 5100000, "rew": 1979.9, "rew_std": 212.22226556136846, "Agent": "qrdqn"}, {"env_step": 5200000, "rew": 1924.1, "rew_std": 592.8174170855643, "Agent": "qrdqn"}, {"env_step": 5300000, "rew": 2100.4, "rew_std": 348.4285292567186, "Agent": "qrdqn"}, {"env_step": 5400000, "rew": 1820.9, "rew_std": 254.82796157407844, "Agent": "qrdqn"}, {"env_step": 5500000, "rew": 1936.0, "rew_std": 332.0704744478196, "Agent": "qrdqn"}, {"env_step": 5600000, "rew": 1994.6, "rew_std": 341.4827082005764, "Agent": "qrdqn"}, {"env_step": 5700000, "rew": 1998.0, "rew_std": 395.96893817571095, "Agent": "qrdqn"}, {"env_step": 5800000, "rew": 2020.0, "rew_std": 326.8748996175754, "Agent": "qrdqn"}, {"env_step": 5900000, "rew": 1926.1, "rew_std": 247.97235733040887, "Agent": "qrdqn"}, {"env_step": 6000000, "rew": 2020.8, "rew_std": 313.74059348449, "Agent": "qrdqn"}, {"env_step": 6100000, "rew": 1995.5, "rew_std": 454.48965884825145, "Agent": "qrdqn"}, {"env_step": 6200000, "rew": 2165.4, "rew_std": 601.1125019495103, "Agent": "qrdqn"}, {"env_step": 6300000, "rew": 2024.4, "rew_std": 388.68424202686685, "Agent": "qrdqn"}, {"env_step": 6400000, "rew": 1701.7, "rew_std": 395.4142258442405, "Agent": "qrdqn"}, {"env_step": 6500000, "rew": 1837.4, "rew_std": 457.4256660923171, "Agent": "qrdqn"}, {"env_step": 6600000, "rew": 2029.2, "rew_std": 292.2097876526384, "Agent": "qrdqn"}, {"env_step": 6700000, "rew": 2231.2, "rew_std": 258.9786863817175, "Agent": "qrdqn"}, {"env_step": 6800000, "rew": 1961.0, "rew_std": 310.98295773241335, "Agent": "qrdqn"}, {"env_step": 6900000, "rew": 2151.2, "rew_std": 517.7655453967558, "Agent": "qrdqn"}, {"env_step": 7000000, "rew": 2058.0, "rew_std": 143.03985458605584, "Agent": "qrdqn"}, {"env_step": 7100000, "rew": 1998.7, "rew_std": 416.9923380591063, "Agent": "qrdqn"}, {"env_step": 7200000, "rew": 2028.3, "rew_std": 338.8569167067422, "Agent": "qrdqn"}, {"env_step": 7300000, "rew": 1894.4, "rew_std": 426.75219976000125, "Agent": "qrdqn"}, {"env_step": 7400000, "rew": 2052.4, "rew_std": 381.0210492873064, "Agent": "qrdqn"}, {"env_step": 7500000, "rew": 2102.5, "rew_std": 231.69905049438592, "Agent": "qrdqn"}, {"env_step": 7600000, "rew": 2112.5, "rew_std": 287.3075877870266, "Agent": "qrdqn"}, {"env_step": 7700000, "rew": 2069.4, "rew_std": 393.8327563827062, "Agent": "qrdqn"}, {"env_step": 7800000, "rew": 2143.4, "rew_std": 293.58174330158886, "Agent": "qrdqn"}, {"env_step": 7900000, "rew": 2049.1, "rew_std": 565.8735636164672, "Agent": "qrdqn"}, {"env_step": 8000000, "rew": 1993.5, "rew_std": 267.2022642119636, "Agent": "qrdqn"}, {"env_step": 8100000, "rew": 1891.7, "rew_std": 279.16018698947744, "Agent": "qrdqn"}, {"env_step": 8200000, "rew": 2026.0, "rew_std": 382.7017115195593, "Agent": "qrdqn"}, {"env_step": 8300000, "rew": 1835.2, "rew_std": 455.81439205009747, "Agent": "qrdqn"}, {"env_step": 8400000, "rew": 1992.0, "rew_std": 213.05257567088927, "Agent": "qrdqn"}, {"env_step": 8500000, "rew": 2126.1, "rew_std": 186.0109942987242, "Agent": "qrdqn"}, {"env_step": 8600000, "rew": 2112.9, "rew_std": 253.74532508008892, "Agent": "qrdqn"}, {"env_step": 8700000, "rew": 2143.2, "rew_std": 242.88507570453973, "Agent": "qrdqn"}, {"env_step": 8800000, "rew": 1859.2, "rew_std": 253.84436176523596, "Agent": "qrdqn"}, {"env_step": 8900000, "rew": 1909.5, "rew_std": 577.5562743144602, "Agent": "qrdqn"}, {"env_step": 9000000, "rew": 2166.2, "rew_std": 251.24044260429093, "Agent": "qrdqn"}, {"env_step": 9100000, "rew": 2221.9, "rew_std": 475.83493986885827, "Agent": "qrdqn"}, {"env_step": 9200000, "rew": 2060.8, "rew_std": 244.7904409898393, "Agent": "qrdqn"}, {"env_step": 9300000, "rew": 2114.2, "rew_std": 240.22980664355538, "Agent": "qrdqn"}, {"env_step": 9400000, "rew": 2168.0, "rew_std": 323.3663557020118, "Agent": "qrdqn"}, {"env_step": 9500000, "rew": 2012.4, "rew_std": 346.8542056830218, "Agent": "qrdqn"}, {"env_step": 9600000, "rew": 1901.9, "rew_std": 406.43263894525006, "Agent": "qrdqn"}, {"env_step": 9700000, "rew": 2197.4, "rew_std": 377.0950012927777, "Agent": "qrdqn"}, {"env_step": 9800000, "rew": 2095.6, "rew_std": 300.4899998336051, "Agent": "qrdqn"}, {"env_step": 9900000, "rew": 2027.9, "rew_std": 359.36790340819255, "Agent": "qrdqn"}, {"env_step": 10000000, "rew": 2259.3, "rew_std": 269.21108818174633, "Agent": "qrdqn"}, {"env_step": 0, "rew": 203.5, "rew_std": 125.49442218680478, "Agent": "iqn"}, {"env_step": 100000, "rew": 497.4, "rew_std": 171.87914358641655, "Agent": "iqn"}, {"env_step": 200000, "rew": 719.6, "rew_std": 175.82332041000703, "Agent": "iqn"}, {"env_step": 300000, "rew": 808.8, "rew_std": 230.12640005005943, "Agent": "iqn"}, {"env_step": 400000, "rew": 841.3, "rew_std": 215.85182417575254, "Agent": "iqn"}, {"env_step": 500000, "rew": 917.6, "rew_std": 177.8264322309819, "Agent": "iqn"}, {"env_step": 600000, "rew": 896.3, "rew_std": 348.6686249148323, "Agent": "iqn"}, {"env_step": 700000, "rew": 1141.9, "rew_std": 364.5055417960062, "Agent": "iqn"}, {"env_step": 800000, "rew": 1323.0, "rew_std": 291.3722704719857, "Agent": "iqn"}, {"env_step": 900000, "rew": 1274.1, "rew_std": 234.52439105559998, "Agent": "iqn"}, {"env_step": 1000000, "rew": 1553.1, "rew_std": 408.0204529187232, "Agent": "iqn"}, {"env_step": 1100000, "rew": 1436.4, "rew_std": 345.38245467886753, "Agent": "iqn"}, {"env_step": 1200000, "rew": 1649.8, "rew_std": 456.99295399382254, "Agent": "iqn"}, {"env_step": 1300000, "rew": 1489.3, "rew_std": 167.85592036028996, "Agent": "iqn"}, {"env_step": 1400000, "rew": 1645.2, "rew_std": 115.38006760268429, "Agent": "iqn"}, {"env_step": 1500000, "rew": 1641.9, "rew_std": 186.58534240395198, "Agent": "iqn"}, {"env_step": 1600000, "rew": 1599.5, "rew_std": 387.8569968429086, "Agent": "iqn"}, {"env_step": 1700000, "rew": 1690.2, "rew_std": 161.43902873840636, "Agent": "iqn"}, {"env_step": 1800000, "rew": 1613.6, "rew_std": 344.5162405460735, "Agent": "iqn"}, {"env_step": 1900000, "rew": 1773.5, "rew_std": 319.91131583612355, "Agent": "iqn"}, {"env_step": 2000000, "rew": 1738.7, "rew_std": 435.27900248001856, "Agent": "iqn"}, {"env_step": 2100000, "rew": 1719.0, "rew_std": 253.0829903411132, "Agent": "iqn"}, {"env_step": 2200000, "rew": 1831.4, "rew_std": 298.3257280222408, "Agent": "iqn"}, {"env_step": 2300000, "rew": 1982.3, "rew_std": 322.387980545181, "Agent": "iqn"}, {"env_step": 2400000, "rew": 1801.0, "rew_std": 123.33045041675636, "Agent": "iqn"}, {"env_step": 2500000, "rew": 1800.4, "rew_std": 263.40888367706964, "Agent": "iqn"}, {"env_step": 2600000, "rew": 1744.0, "rew_std": 312.34083946868043, "Agent": "iqn"}, {"env_step": 2700000, "rew": 2024.6, "rew_std": 493.24663202093933, "Agent": "iqn"}, {"env_step": 2800000, "rew": 1913.7, "rew_std": 184.7598711841941, "Agent": "iqn"}, {"env_step": 2900000, "rew": 1956.7, "rew_std": 347.0703242860156, "Agent": "iqn"}, {"env_step": 3000000, "rew": 1950.4, "rew_std": 288.921165718263, "Agent": "iqn"}, {"env_step": 3100000, "rew": 1983.4, "rew_std": 243.26002548713177, "Agent": "iqn"}, {"env_step": 3200000, "rew": 2040.2, "rew_std": 305.4157166879269, "Agent": "iqn"}, {"env_step": 3300000, "rew": 2148.3, "rew_std": 352.78890288669794, "Agent": "iqn"}, {"env_step": 3400000, "rew": 1893.3, "rew_std": 572.8401260386705, "Agent": "iqn"}, {"env_step": 3500000, "rew": 2011.7, "rew_std": 243.61323855652836, "Agent": "iqn"}, {"env_step": 3600000, "rew": 1999.7, "rew_std": 199.95301948207737, "Agent": "iqn"}, {"env_step": 3700000, "rew": 2145.6, "rew_std": 185.3392565000734, "Agent": "iqn"}, {"env_step": 3800000, "rew": 2101.3, "rew_std": 386.0235873622233, "Agent": "iqn"}, {"env_step": 3900000, "rew": 1885.6, "rew_std": 300.85850494875496, "Agent": "iqn"}, {"env_step": 4000000, "rew": 2040.6, "rew_std": 263.6210158541993, "Agent": "iqn"}, {"env_step": 4100000, "rew": 2034.7, "rew_std": 204.19894710796137, "Agent": "iqn"}, {"env_step": 4200000, "rew": 2011.4, "rew_std": 203.3834801551001, "Agent": "iqn"}, {"env_step": 4300000, "rew": 1990.5, "rew_std": 242.6170851362286, "Agent": "iqn"}, {"env_step": 4400000, "rew": 1978.7, "rew_std": 212.20275681526854, "Agent": "iqn"}, {"env_step": 4500000, "rew": 1977.7, "rew_std": 178.95030036297788, "Agent": "iqn"}, {"env_step": 4600000, "rew": 1849.0, "rew_std": 308.38774294708924, "Agent": "iqn"}, {"env_step": 4700000, "rew": 1953.0, "rew_std": 273.4209209259599, "Agent": "iqn"}, {"env_step": 4800000, "rew": 2019.8, "rew_std": 216.3579441573616, "Agent": "iqn"}, {"env_step": 4900000, "rew": 1956.9, "rew_std": 152.99048990051637, "Agent": "iqn"}, {"env_step": 5000000, "rew": 2045.9, "rew_std": 282.905090092066, "Agent": "iqn"}, {"env_step": 5100000, "rew": 1971.6, "rew_std": 380.651336527274, "Agent": "iqn"}, {"env_step": 5200000, "rew": 2039.4, "rew_std": 223.29630538815462, "Agent": "iqn"}, {"env_step": 5300000, "rew": 1975.8, "rew_std": 201.08694636897744, "Agent": "iqn"}, {"env_step": 5400000, "rew": 2064.5, "rew_std": 254.50432216369134, "Agent": "iqn"}, {"env_step": 5500000, "rew": 2134.7, "rew_std": 428.5522255221643, "Agent": "iqn"}, {"env_step": 5600000, "rew": 1948.5, "rew_std": 272.5880591662078, "Agent": "iqn"}, {"env_step": 5700000, "rew": 2002.2, "rew_std": 340.7244634598461, "Agent": "iqn"}, {"env_step": 5800000, "rew": 2045.1, "rew_std": 164.70364294696094, "Agent": "iqn"}, {"env_step": 5900000, "rew": 1886.4, "rew_std": 163.4730558838367, "Agent": "iqn"}, {"env_step": 6000000, "rew": 1919.7, "rew_std": 219.83268637761765, "Agent": "iqn"}, {"env_step": 6100000, "rew": 2004.6, "rew_std": 165.23207920981932, "Agent": "iqn"}, {"env_step": 6200000, "rew": 1947.4, "rew_std": 389.1902362598528, "Agent": "iqn"}, {"env_step": 6300000, "rew": 2121.2, "rew_std": 371.044148316612, "Agent": "iqn"}, {"env_step": 6400000, "rew": 2047.5, "rew_std": 190.7633350515764, "Agent": "iqn"}, {"env_step": 6500000, "rew": 2032.7, "rew_std": 139.8721201669582, "Agent": "iqn"}, {"env_step": 6600000, "rew": 2159.6, "rew_std": 173.13416762730577, "Agent": "iqn"}, {"env_step": 6700000, "rew": 1899.0, "rew_std": 313.5557366721266, "Agent": "iqn"}, {"env_step": 6800000, "rew": 2104.5, "rew_std": 332.75554090052356, "Agent": "iqn"}, {"env_step": 6900000, "rew": 2212.5, "rew_std": 400.02956140765394, "Agent": "iqn"}, {"env_step": 7000000, "rew": 1910.5, "rew_std": 262.7969748684334, "Agent": "iqn"}, {"env_step": 7100000, "rew": 2110.5, "rew_std": 244.04640952081226, "Agent": "iqn"}, {"env_step": 7200000, "rew": 2069.4, "rew_std": 252.60095011697797, "Agent": "iqn"}, {"env_step": 7300000, "rew": 1997.3, "rew_std": 178.55478150976523, "Agent": "iqn"}, {"env_step": 7400000, "rew": 2102.1, "rew_std": 270.0775629333173, "Agent": "iqn"}, {"env_step": 7500000, "rew": 1930.6, "rew_std": 381.8453089930528, "Agent": "iqn"}, {"env_step": 7600000, "rew": 2114.2, "rew_std": 166.0757658419795, "Agent": "iqn"}, {"env_step": 7700000, "rew": 2000.9, "rew_std": 236.48909065747623, "Agent": "iqn"}, {"env_step": 7800000, "rew": 2138.6, "rew_std": 264.63189528097325, "Agent": "iqn"}, {"env_step": 7900000, "rew": 2128.6, "rew_std": 213.55570701809867, "Agent": "iqn"}, {"env_step": 8000000, "rew": 2109.3, "rew_std": 174.64652873733274, "Agent": "iqn"}, {"env_step": 8100000, "rew": 2009.0, "rew_std": 247.61986996200446, "Agent": "iqn"}, {"env_step": 8200000, "rew": 1983.7, "rew_std": 389.60288756630126, "Agent": "iqn"}, {"env_step": 8300000, "rew": 1994.3, "rew_std": 114.65692303563705, "Agent": "iqn"}, {"env_step": 8400000, "rew": 2095.6, "rew_std": 306.54630971518804, "Agent": "iqn"}, {"env_step": 8500000, "rew": 2008.5, "rew_std": 301.76555469436863, "Agent": "iqn"}, {"env_step": 8600000, "rew": 2129.8, "rew_std": 119.71365836862559, "Agent": "iqn"}, {"env_step": 8700000, "rew": 1975.8, "rew_std": 117.61700557317381, "Agent": "iqn"}, {"env_step": 8800000, "rew": 2123.2, "rew_std": 291.63051966486637, "Agent": "iqn"}, {"env_step": 8900000, "rew": 2044.2, "rew_std": 255.55109078225433, "Agent": "iqn"}, {"env_step": 9000000, "rew": 2228.6, "rew_std": 253.11902338623227, "Agent": "iqn"}, {"env_step": 9100000, "rew": 2149.0, "rew_std": 178.49201662819544, "Agent": "iqn"}, {"env_step": 9200000, "rew": 2148.9, "rew_std": 300.1541104166325, "Agent": "iqn"}, {"env_step": 9300000, "rew": 2022.7, "rew_std": 154.7081445819838, "Agent": "iqn"}, {"env_step": 9400000, "rew": 2217.1, "rew_std": 328.33350423007397, "Agent": "iqn"}, {"env_step": 9500000, "rew": 1985.3, "rew_std": 223.17842637674457, "Agent": "iqn"}, {"env_step": 9600000, "rew": 2110.1, "rew_std": 211.15892119444067, "Agent": "iqn"}, {"env_step": 9700000, "rew": 2162.6, "rew_std": 227.4173256372522, "Agent": "iqn"}, {"env_step": 9800000, "rew": 2212.4, "rew_std": 328.63329107076174, "Agent": "iqn"}, {"env_step": 9900000, "rew": 2094.4, "rew_std": 378.75142243957316, "Agent": "iqn"}, {"env_step": 10000000, "rew": 2151.1, "rew_std": 407.2558041329798, "Agent": "iqn"}, {"env_step": 0, "rew": 218.6, "rew_std": 99.78997945685728, "Agent": "rainbow"}, {"env_step": 100000, "rew": 395.4, "rew_std": 217.94045058226342, "Agent": "rainbow"}, {"env_step": 200000, "rew": 716.4, "rew_std": 209.39923591073583, "Agent": "rainbow"}, {"env_step": 300000, "rew": 943.7, "rew_std": 255.72096120576424, "Agent": "rainbow"}, {"env_step": 400000, "rew": 1031.6, "rew_std": 220.23632761195415, "Agent": "rainbow"}, {"env_step": 500000, "rew": 1255.4, "rew_std": 227.50701088098361, "Agent": "rainbow"}, {"env_step": 600000, "rew": 1306.0, "rew_std": 232.00991358129505, "Agent": "rainbow"}, {"env_step": 700000, "rew": 1406.3, "rew_std": 257.8658759898254, "Agent": "rainbow"}, {"env_step": 800000, "rew": 1297.9, "rew_std": 324.35488280585514, "Agent": "rainbow"}, {"env_step": 900000, "rew": 1442.4, "rew_std": 252.78734145522398, "Agent": "rainbow"}, {"env_step": 1000000, "rew": 1444.5, "rew_std": 303.269269791715, "Agent": "rainbow"}, {"env_step": 1100000, "rew": 1614.9, "rew_std": 246.82117008068815, "Agent": "rainbow"}, {"env_step": 1200000, "rew": 1609.4, "rew_std": 298.84952735448655, "Agent": "rainbow"}, {"env_step": 1300000, "rew": 1685.1, "rew_std": 399.2817175879707, "Agent": "rainbow"}, {"env_step": 1400000, "rew": 1548.6, "rew_std": 186.0033333034653, "Agent": "rainbow"}, {"env_step": 1500000, "rew": 1715.5, "rew_std": 250.8785562777337, "Agent": "rainbow"}, {"env_step": 1600000, "rew": 1737.4, "rew_std": 276.36541028138817, "Agent": "rainbow"}, {"env_step": 1700000, "rew": 2035.6, "rew_std": 429.08791639942507, "Agent": "rainbow"}, {"env_step": 1800000, "rew": 1743.6, "rew_std": 354.16470744556125, "Agent": "rainbow"}, {"env_step": 1900000, "rew": 1857.3, "rew_std": 287.29046277243526, "Agent": "rainbow"}, {"env_step": 2000000, "rew": 1836.8, "rew_std": 371.6750731485769, "Agent": "rainbow"}, {"env_step": 2100000, "rew": 1950.2, "rew_std": 312.3401351091467, "Agent": "rainbow"}, {"env_step": 2200000, "rew": 2048.7, "rew_std": 436.6781537929279, "Agent": "rainbow"}, {"env_step": 2300000, "rew": 1939.5, "rew_std": 278.96782968650706, "Agent": "rainbow"}, {"env_step": 2400000, "rew": 1835.0, "rew_std": 308.29239367846884, "Agent": "rainbow"}, {"env_step": 2500000, "rew": 1861.0, "rew_std": 219.7257381373425, "Agent": "rainbow"}, {"env_step": 2600000, "rew": 1996.7, "rew_std": 346.1849361251873, "Agent": "rainbow"}, {"env_step": 2700000, "rew": 2101.1, "rew_std": 340.95056826466794, "Agent": "rainbow"}, {"env_step": 2800000, "rew": 2038.1, "rew_std": 255.98728484047797, "Agent": "rainbow"}, {"env_step": 2900000, "rew": 1941.0, "rew_std": 302.7953103996163, "Agent": "rainbow"}, {"env_step": 3000000, "rew": 2099.1, "rew_std": 384.3590638972886, "Agent": "rainbow"}, {"env_step": 3100000, "rew": 2072.2, "rew_std": 272.6069698301934, "Agent": "rainbow"}, {"env_step": 3200000, "rew": 1995.5, "rew_std": 265.344021979015, "Agent": "rainbow"}, {"env_step": 3300000, "rew": 2059.7, "rew_std": 355.7518938811148, "Agent": "rainbow"}, {"env_step": 3400000, "rew": 1939.6, "rew_std": 301.1342557730688, "Agent": "rainbow"}, {"env_step": 3500000, "rew": 1921.4, "rew_std": 263.6744963017849, "Agent": "rainbow"}, {"env_step": 3600000, "rew": 2222.2, "rew_std": 170.95309298167143, "Agent": "rainbow"}, {"env_step": 3700000, "rew": 2048.7, "rew_std": 211.11515814834328, "Agent": "rainbow"}, {"env_step": 3800000, "rew": 2072.6, "rew_std": 327.48288504897477, "Agent": "rainbow"}, {"env_step": 3900000, "rew": 2167.4, "rew_std": 428.5650942389032, "Agent": "rainbow"}, {"env_step": 4000000, "rew": 2107.1, "rew_std": 285.2712568766787, "Agent": "rainbow"}, {"env_step": 4100000, "rew": 1802.2, "rew_std": 228.4192636359727, "Agent": "rainbow"}, {"env_step": 4200000, "rew": 1961.4, "rew_std": 254.83100282343983, "Agent": "rainbow"}, {"env_step": 4300000, "rew": 2048.1, "rew_std": 245.7040699703609, "Agent": "rainbow"}, {"env_step": 4400000, "rew": 2136.8, "rew_std": 292.6403253141986, "Agent": "rainbow"}, {"env_step": 4500000, "rew": 2099.8, "rew_std": 350.09107386507304, "Agent": "rainbow"}, {"env_step": 4600000, "rew": 2179.6, "rew_std": 253.28489887871328, "Agent": "rainbow"}, {"env_step": 4700000, "rew": 2250.3, "rew_std": 184.54974939023896, "Agent": "rainbow"}, {"env_step": 4800000, "rew": 1950.7, "rew_std": 262.8326653975871, "Agent": "rainbow"}, {"env_step": 4900000, "rew": 2161.1, "rew_std": 393.27940449507395, "Agent": "rainbow"}, {"env_step": 5000000, "rew": 2120.8, "rew_std": 218.70198901701832, "Agent": "rainbow"}, {"env_step": 5100000, "rew": 2207.4, "rew_std": 232.03973797606307, "Agent": "rainbow"}, {"env_step": 5200000, "rew": 2217.3, "rew_std": 359.2347561136032, "Agent": "rainbow"}, {"env_step": 5300000, "rew": 2141.2, "rew_std": 243.542521954586, "Agent": "rainbow"}, {"env_step": 5400000, "rew": 2160.4, "rew_std": 287.35072646506393, "Agent": "rainbow"}, {"env_step": 5500000, "rew": 2235.1, "rew_std": 212.19493396403223, "Agent": "rainbow"}, {"env_step": 5600000, "rew": 2280.4, "rew_std": 318.04597151984177, "Agent": "rainbow"}, {"env_step": 5700000, "rew": 2358.9, "rew_std": 310.13430961439917, "Agent": "rainbow"}, {"env_step": 5800000, "rew": 2267.6, "rew_std": 273.4484229246898, "Agent": "rainbow"}, {"env_step": 5900000, "rew": 2193.4, "rew_std": 181.35997353330202, "Agent": "rainbow"}, {"env_step": 6000000, "rew": 2366.9, "rew_std": 578.7907134707674, "Agent": "rainbow"}, {"env_step": 6100000, "rew": 2292.2, "rew_std": 293.46372859350095, "Agent": "rainbow"}, {"env_step": 6200000, "rew": 2048.0, "rew_std": 355.46139030842716, "Agent": "rainbow"}, {"env_step": 6300000, "rew": 2311.8, "rew_std": 276.04304012236935, "Agent": "rainbow"}, {"env_step": 6400000, "rew": 2211.3, "rew_std": 304.2528718023874, "Agent": "rainbow"}, {"env_step": 6500000, "rew": 2256.9, "rew_std": 187.56622830349818, "Agent": "rainbow"}, {"env_step": 6600000, "rew": 2262.1, "rew_std": 290.55342021735004, "Agent": "rainbow"}, {"env_step": 6700000, "rew": 2175.7, "rew_std": 346.9455432773276, "Agent": "rainbow"}, {"env_step": 6800000, "rew": 2179.1, "rew_std": 243.099341833745, "Agent": "rainbow"}, {"env_step": 6900000, "rew": 2338.0, "rew_std": 367.66288907095316, "Agent": "rainbow"}, {"env_step": 7000000, "rew": 2354.4, "rew_std": 258.2797707912875, "Agent": "rainbow"}, {"env_step": 7100000, "rew": 2320.4, "rew_std": 294.2781677257081, "Agent": "rainbow"}, {"env_step": 7200000, "rew": 2389.3, "rew_std": 247.6655204100886, "Agent": "rainbow"}, {"env_step": 7300000, "rew": 2187.6, "rew_std": 325.17201601613874, "Agent": "rainbow"}, {"env_step": 7400000, "rew": 2160.5, "rew_std": 205.99866504421817, "Agent": "rainbow"}, {"env_step": 7500000, "rew": 2400.5, "rew_std": 389.60268222896, "Agent": "rainbow"}, {"env_step": 7600000, "rew": 2228.4, "rew_std": 339.70051516004503, "Agent": "rainbow"}, {"env_step": 7700000, "rew": 2230.1, "rew_std": 383.68019234774164, "Agent": "rainbow"}, {"env_step": 7800000, "rew": 2358.0, "rew_std": 292.9624549323684, "Agent": "rainbow"}, {"env_step": 7900000, "rew": 2243.1, "rew_std": 245.06833740816052, "Agent": "rainbow"}, {"env_step": 8000000, "rew": 2271.8, "rew_std": 182.58466529257052, "Agent": "rainbow"}, {"env_step": 8100000, "rew": 2178.2, "rew_std": 284.79389038390553, "Agent": "rainbow"}, {"env_step": 8200000, "rew": 2151.4, "rew_std": 386.34937556569184, "Agent": "rainbow"}, {"env_step": 8300000, "rew": 2225.5, "rew_std": 272.64161457855255, "Agent": "rainbow"}, {"env_step": 8400000, "rew": 2378.0, "rew_std": 335.8109587252923, "Agent": "rainbow"}, {"env_step": 8500000, "rew": 2290.5, "rew_std": 365.62802135503784, "Agent": "rainbow"}, {"env_step": 8600000, "rew": 2313.5, "rew_std": 400.5274647262033, "Agent": "rainbow"}, {"env_step": 8700000, "rew": 2258.8, "rew_std": 245.23898548150945, "Agent": "rainbow"}, {"env_step": 8800000, "rew": 2345.8, "rew_std": 273.3780532522682, "Agent": "rainbow"}, {"env_step": 8900000, "rew": 2222.2, "rew_std": 320.44181999233496, "Agent": "rainbow"}, {"env_step": 9000000, "rew": 2361.9, "rew_std": 291.36212863033523, "Agent": "rainbow"}, {"env_step": 9100000, "rew": 2401.0, "rew_std": 308.2463300673667, "Agent": "rainbow"}, {"env_step": 9200000, "rew": 2296.9, "rew_std": 327.37759544599265, "Agent": "rainbow"}, {"env_step": 9300000, "rew": 2196.2, "rew_std": 414.1250535768151, "Agent": "rainbow"}, {"env_step": 9400000, "rew": 2343.8, "rew_std": 295.55195820701306, "Agent": "rainbow"}, {"env_step": 9500000, "rew": 2109.8, "rew_std": 314.5847421602008, "Agent": "rainbow"}, {"env_step": 9600000, "rew": 2524.2, "rew_std": 338.81463958925974, "Agent": "rainbow"}, {"env_step": 9700000, "rew": 2397.1, "rew_std": 202.06011481734836, "Agent": "rainbow"}, {"env_step": 9800000, "rew": 2485.8, "rew_std": 377.7405988241137, "Agent": "rainbow"}, {"env_step": 9900000, "rew": 2244.9, "rew_std": 120.4271148869722, "Agent": "rainbow"}, {"env_step": 10000000, "rew": 2214.0, "rew_std": 176.29690865128634, "Agent": "rainbow"}, {"env_step": 0, "rew": 370.7, "rew_std": 113.11502994739469, "Agent": "ppo"}, {"env_step": 100000, "rew": 505.9, "rew_std": 129.42986517801833, "Agent": "ppo"}, {"env_step": 200000, "rew": 421.8, "rew_std": 102.49858535609162, "Agent": "ppo"}, {"env_step": 300000, "rew": 479.5, "rew_std": 92.63719555340609, "Agent": "ppo"}, {"env_step": 400000, "rew": 508.4, "rew_std": 132.38595091625092, "Agent": "ppo"}, {"env_step": 500000, "rew": 560.6, "rew_std": 100.25088528287418, "Agent": "ppo"}, {"env_step": 600000, "rew": 664.6, "rew_std": 175.08866325379265, "Agent": "ppo"}, {"env_step": 700000, "rew": 588.6, "rew_std": 162.83746497658333, "Agent": "ppo"}, {"env_step": 800000, "rew": 610.4, "rew_std": 181.44982777616517, "Agent": "ppo"}, {"env_step": 900000, "rew": 633.7, "rew_std": 107.41233634922946, "Agent": "ppo"}, {"env_step": 1000000, "rew": 697.1, "rew_std": 94.16204118433288, "Agent": "ppo"}, {"env_step": 1100000, "rew": 631.9, "rew_std": 98.8275771229873, "Agent": "ppo"}, {"env_step": 1200000, "rew": 712.6, "rew_std": 130.972668904623, "Agent": "ppo"}, {"env_step": 1300000, "rew": 727.2, "rew_std": 129.8936488054747, "Agent": "ppo"}, {"env_step": 1400000, "rew": 664.1, "rew_std": 156.49054284524672, "Agent": "ppo"}, {"env_step": 1500000, "rew": 628.1, "rew_std": 184.3379776389011, "Agent": "ppo"}, {"env_step": 1600000, "rew": 641.9, "rew_std": 127.15065866915515, "Agent": "ppo"}, {"env_step": 1700000, "rew": 647.3, "rew_std": 92.44355034289846, "Agent": "ppo"}, {"env_step": 1800000, "rew": 647.3, "rew_std": 125.52294610946637, "Agent": "ppo"}, {"env_step": 1900000, "rew": 613.0, "rew_std": 117.30387887874808, "Agent": "ppo"}, {"env_step": 2000000, "rew": 757.2, "rew_std": 211.36262678155757, "Agent": "ppo"}, {"env_step": 2100000, "rew": 698.0, "rew_std": 88.34591105421914, "Agent": "ppo"}, {"env_step": 2200000, "rew": 756.7, "rew_std": 118.22609694986974, "Agent": "ppo"}, {"env_step": 2300000, "rew": 694.6, "rew_std": 142.86441124366837, "Agent": "ppo"}, {"env_step": 2400000, "rew": 795.3, "rew_std": 180.00836091693074, "Agent": "ppo"}, {"env_step": 2500000, "rew": 637.0, "rew_std": 111.93748255164576, "Agent": "ppo"}, {"env_step": 2600000, "rew": 731.4, "rew_std": 201.773239058107, "Agent": "ppo"}, {"env_step": 2700000, "rew": 709.3, "rew_std": 171.81679196167062, "Agent": "ppo"}, {"env_step": 2800000, "rew": 643.3, "rew_std": 124.15880959480886, "Agent": "ppo"}, {"env_step": 2900000, "rew": 841.8, "rew_std": 230.30275725661647, "Agent": "ppo"}, {"env_step": 3000000, "rew": 771.9, "rew_std": 201.02659028098745, "Agent": "ppo"}, {"env_step": 3100000, "rew": 803.4, "rew_std": 195.58128744846732, "Agent": "ppo"}, {"env_step": 3200000, "rew": 756.8, "rew_std": 186.79657384438292, "Agent": "ppo"}, {"env_step": 3300000, "rew": 761.7, "rew_std": 183.00986312218257, "Agent": "ppo"}, {"env_step": 3400000, "rew": 884.0, "rew_std": 177.51788642274894, "Agent": "ppo"}, {"env_step": 3500000, "rew": 882.3, "rew_std": 235.03235947417963, "Agent": "ppo"}, {"env_step": 3600000, "rew": 886.8, "rew_std": 165.33166665826604, "Agent": "ppo"}, {"env_step": 3700000, "rew": 887.6, "rew_std": 155.86545479996522, "Agent": "ppo"}, {"env_step": 3800000, "rew": 870.0, "rew_std": 140.03142504452347, "Agent": "ppo"}, {"env_step": 3900000, "rew": 963.2, "rew_std": 163.08267841803433, "Agent": "ppo"}, {"env_step": 4000000, "rew": 915.2, "rew_std": 198.6211469103932, "Agent": "ppo"}, {"env_step": 4100000, "rew": 954.3, "rew_std": 224.29135070260733, "Agent": "ppo"}, {"env_step": 4200000, "rew": 1005.9, "rew_std": 185.8673989703412, "Agent": "ppo"}, {"env_step": 4300000, "rew": 1021.8, "rew_std": 173.70768549491413, "Agent": "ppo"}, {"env_step": 4400000, "rew": 969.5, "rew_std": 176.3333490863257, "Agent": "ppo"}, {"env_step": 4500000, "rew": 1041.1, "rew_std": 177.89291722831462, "Agent": "ppo"}, {"env_step": 4600000, "rew": 977.5, "rew_std": 200.08660624839436, "Agent": "ppo"}, {"env_step": 4700000, "rew": 1033.2, "rew_std": 133.5520872169357, "Agent": "ppo"}, {"env_step": 4800000, "rew": 1085.6, "rew_std": 141.09018392503427, "Agent": "ppo"}, {"env_step": 4900000, "rew": 1077.5, "rew_std": 248.93543339589084, "Agent": "ppo"}, {"env_step": 5000000, "rew": 1067.3, "rew_std": 158.23656341061002, "Agent": "ppo"}, {"env_step": 5100000, "rew": 1198.8, "rew_std": 166.84831434569546, "Agent": "ppo"}, {"env_step": 5200000, "rew": 1088.0, "rew_std": 144.770853420155, "Agent": "ppo"}, {"env_step": 5300000, "rew": 1108.4, "rew_std": 154.99238690980923, "Agent": "ppo"}, {"env_step": 5400000, "rew": 1203.5, "rew_std": 257.2929264476581, "Agent": "ppo"}, {"env_step": 5500000, "rew": 1092.1, "rew_std": 100.34286222746488, "Agent": "ppo"}, {"env_step": 5600000, "rew": 1198.8, "rew_std": 151.49838282965268, "Agent": "ppo"}, {"env_step": 5700000, "rew": 1137.5, "rew_std": 123.52024125624108, "Agent": "ppo"}, {"env_step": 5800000, "rew": 1118.2, "rew_std": 153.89463928285483, "Agent": "ppo"}, {"env_step": 5900000, "rew": 1187.0, "rew_std": 157.57855184002676, "Agent": "ppo"}, {"env_step": 6000000, "rew": 1200.2, "rew_std": 167.1201962660408, "Agent": "ppo"}, {"env_step": 6100000, "rew": 1207.1, "rew_std": 205.2556698364262, "Agent": "ppo"}, {"env_step": 6200000, "rew": 1304.3, "rew_std": 198.32904477156137, "Agent": "ppo"}, {"env_step": 6300000, "rew": 1280.2, "rew_std": 114.50310039470546, "Agent": "ppo"}, {"env_step": 6400000, "rew": 1224.8, "rew_std": 189.02105702804647, "Agent": "ppo"}, {"env_step": 6500000, "rew": 1325.9, "rew_std": 179.55859767774976, "Agent": "ppo"}, {"env_step": 6600000, "rew": 1417.9, "rew_std": 262.43606840524035, "Agent": "ppo"}, {"env_step": 6700000, "rew": 1329.9, "rew_std": 153.3286992053347, "Agent": "ppo"}, {"env_step": 6800000, "rew": 1324.8, "rew_std": 237.16230729186287, "Agent": "ppo"}, {"env_step": 6900000, "rew": 1362.0, "rew_std": 162.35947770302786, "Agent": "ppo"}, {"env_step": 7000000, "rew": 1291.7, "rew_std": 179.75597347515324, "Agent": "ppo"}, {"env_step": 7100000, "rew": 1315.4, "rew_std": 236.61200307676702, "Agent": "ppo"}, {"env_step": 7200000, "rew": 1400.3, "rew_std": 257.7530019223831, "Agent": "ppo"}, {"env_step": 7300000, "rew": 1361.2, "rew_std": 186.70286553773084, "Agent": "ppo"}, {"env_step": 7400000, "rew": 1465.6, "rew_std": 229.4812410634037, "Agent": "ppo"}, {"env_step": 7500000, "rew": 1450.6, "rew_std": 163.0295678703713, "Agent": "ppo"}, {"env_step": 7600000, "rew": 1490.6, "rew_std": 267.5194198558303, "Agent": "ppo"}, {"env_step": 7700000, "rew": 1461.2, "rew_std": 199.32877363792716, "Agent": "ppo"}, {"env_step": 7800000, "rew": 1510.4, "rew_std": 212.96769708103622, "Agent": "ppo"}, {"env_step": 7900000, "rew": 1515.6, "rew_std": 344.78027785823247, "Agent": "ppo"}, {"env_step": 8000000, "rew": 1401.0, "rew_std": 341.4229049141255, "Agent": "ppo"}, {"env_step": 8100000, "rew": 1480.9, "rew_std": 253.02982037696665, "Agent": "ppo"}, {"env_step": 8200000, "rew": 1490.2, "rew_std": 273.54590108426044, "Agent": "ppo"}, {"env_step": 8300000, "rew": 1565.9, "rew_std": 238.09512804759362, "Agent": "ppo"}, {"env_step": 8400000, "rew": 1507.5, "rew_std": 310.9798224965729, "Agent": "ppo"}, {"env_step": 8500000, "rew": 1463.0, "rew_std": 203.8013738913455, "Agent": "ppo"}, {"env_step": 8600000, "rew": 1554.6, "rew_std": 261.8802016189846, "Agent": "ppo"}, {"env_step": 8700000, "rew": 1525.2, "rew_std": 198.0645349374794, "Agent": "ppo"}, {"env_step": 8800000, "rew": 1599.1, "rew_std": 190.6459808126046, "Agent": "ppo"}, {"env_step": 8900000, "rew": 1544.1, "rew_std": 207.58297136325993, "Agent": "ppo"}, {"env_step": 9000000, "rew": 1524.5, "rew_std": 192.14382633850093, "Agent": "ppo"}, {"env_step": 9100000, "rew": 1563.0, "rew_std": 273.20761336390314, "Agent": "ppo"}, {"env_step": 9200000, "rew": 1699.4, "rew_std": 248.01701554530484, "Agent": "ppo"}, {"env_step": 9300000, "rew": 1534.9, "rew_std": 245.79888120168488, "Agent": "ppo"}, {"env_step": 9400000, "rew": 1526.9, "rew_std": 157.2097007184989, "Agent": "ppo"}, {"env_step": 9500000, "rew": 1573.5, "rew_std": 227.85620465548, "Agent": "ppo"}, {"env_step": 9600000, "rew": 1482.6, "rew_std": 161.7721854955295, "Agent": "ppo"}, {"env_step": 9700000, "rew": 1633.9, "rew_std": 182.42667019928857, "Agent": "ppo"}, {"env_step": 9800000, "rew": 1514.0, "rew_std": 231.70110055845657, "Agent": "ppo"}, {"env_step": 9900000, "rew": 1624.4, "rew_std": 227.23696882329688, "Agent": "ppo"}, {"env_step": 10000000, "rew": 1531.6, "rew_std": 227.96455864892684, "Agent": "ppo"}]
examples/atari/benchmark/PongNoFrameskip-v4/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"env_step": 0, "rew": -20.979999923706053, "rew_std": 0.04000015258789063, "Agent": "c51"}, {"env_step": 100000, "rew": -20.869999885559082, "rew_std": 0.15524167570244413, "Agent": "c51"}, {"env_step": 200000, "rew": -20.560000038146974, "rew_std": 0.40298883737879937, "Agent": "c51"}, {"env_step": 300000, "rew": -18.95999994277954, "rew_std": 2.2632720366854833, "Agent": "c51"}, {"env_step": 400000, "rew": -16.210000228881835, "rew_std": 2.7750494802377017, "Agent": "c51"}, {"env_step": 500000, "rew": -15.040000057220459, "rew_std": 3.022648038181074, "Agent": "c51"}, {"env_step": 600000, "rew": -12.759999966621399, "rew_std": 5.666603835995492, "Agent": "c51"}, {"env_step": 700000, "rew": -8.17000013589859, "rew_std": 6.876634371998414, "Agent": "c51"}, {"env_step": 800000, "rew": -5.910000105202198, "rew_std": 5.658347880641881, "Agent": "c51"}, {"env_step": 900000, "rew": -2.0299999713897705, "rew_std": 7.5090678214603175, "Agent": "c51"}, {"env_step": 1000000, "rew": -1.05, "rew_std": 8.06576073888153, "Agent": "c51"}, {"env_step": 1100000, "rew": 5.750000011920929, "rew_std": 8.470448577145289, "Agent": "c51"}, {"env_step": 1200000, "rew": 11.85, "rew_std": 6.486486065226738, "Agent": "c51"}, {"env_step": 1300000, "rew": 11.839999842643739, "rew_std": 9.283232047765221, "Agent": "c51"}, {"env_step": 1400000, "rew": 10.289999675750732, "rew_std": 13.408239764024396, "Agent": "c51"}, {"env_step": 1500000, "rew": 15.300000054495674, "rew_std": 7.003264581462973, "Agent": "c51"}, {"env_step": 1600000, "rew": 15.419999885559083, "rew_std": 5.896914245313163, "Agent": "c51"}, {"env_step": 1700000, "rew": 16.47999973297119, "rew_std": 6.142116861407374, "Agent": "c51"}, {"env_step": 1800000, "rew": 18.700000381469728, "rew_std": 1.2743627474105064, "Agent": "c51"}, {"env_step": 1900000, "rew": 13.000000283122063, "rew_std": 8.335466556491935, "Agent": "c51"}, {"env_step": 2000000, "rew": 17.47499966621399, "rew_std": 3.660174086617874, "Agent": "c51"}, {"env_step": 2100000, "rew": 13.566666801770529, "rew_std": 8.47833851940157, "Agent": "c51"}, {"env_step": 2200000, "rew": 19.75, "rew_std": 0.5499992370605469, "Agent": "c51"}, {"env_step": 2300000, "rew": 14.0, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 2400000, "rew": 19.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 2500000, "rew": 15.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 2600000, "rew": 18.200000762939453, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 2700000, "rew": 19.0, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 2800000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 2900000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3000000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3100000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3200000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3300000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3400000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3500000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3600000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3700000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3800000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 3900000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4000000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4100000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4200000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4300000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4400000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4500000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4600000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4700000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4800000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 4900000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5000000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5100000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5200000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5300000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5400000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5500000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5600000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5700000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5800000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 5900000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6000000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6100000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6200000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6300000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6400000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6500000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6600000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6700000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6800000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 6900000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7000000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7100000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7200000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7300000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7400000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7500000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7600000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7700000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7800000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 7900000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8000000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8100000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8200000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8300000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8400000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8500000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8600000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8700000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8800000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 8900000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9000000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9100000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9200000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9300000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9400000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9500000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9600000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9700000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9800000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 9900000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 10000000, "rew": 20.600000381469727, "rew_std": 0.0, "Agent": "c51"}, {"env_step": 0, "rew": -20.979999923706053, "rew_std": 0.04000015258789063, "Agent": "dqn"}, {"env_step": 100000, "rew": -20.689999961853026, "rew_std": 0.5281095979692643, "Agent": "dqn"}, {"env_step": 200000, "rew": -18.38000020980835, "rew_std": 2.5906757312772744, "Agent": "dqn"}, {"env_step": 300000, "rew": -18.030000019073487, "rew_std": 1.7245580854624265, "Agent": "dqn"}, {"env_step": 400000, "rew": -13.899999952316284, "rew_std": 3.808936970212056, "Agent": "dqn"}, {"env_step": 500000, "rew": -5.709999942779541, "rew_std": 9.006936246078585, "Agent": "dqn"}, {"env_step": 600000, "rew": -1.0700000286102296, "rew_std": 8.908540906843577, "Agent": "dqn"}, {"env_step": 700000, "rew": 6.160000026226044, "rew_std": 7.3988107178341656, "Agent": "dqn"}, {"env_step": 800000, "rew": 15.04000015258789, "rew_std": 5.2547504042740645, "Agent": "dqn"}, {"env_step": 900000, "rew": 19.755555470784504, "rew_std": 0.4374447957623508, "Agent": "dqn"}, {"env_step": 1000000, "rew": 19.983332951863606, "rew_std": 0.5814256919850659, "Agent": "dqn"}, {"env_step": 1100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 1200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 1300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 1400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 1500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 1600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 1700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 1800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 1900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 2900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 3900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 4900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 5900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 6900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 7900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 8900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9100000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9200000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9300000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9400000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9500000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9600000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9700000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9800000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 9900000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 10000000, "rew": 20.25, "rew_std": 0.25, "Agent": "dqn"}, {"env_step": 0, "rew": -20.979999923706053, "rew_std": 0.04000015258789063, "Agent": "fqf"}, {"env_step": 100000, "rew": -20.879999923706055, "rew_std": 0.15999994277996732, "Agent": "fqf"}, {"env_step": 200000, "rew": -19.329999923706055, "rew_std": 1.1883183401767072, "Agent": "fqf"}, {"env_step": 300000, "rew": -18.410000228881835, "rew_std": 2.3947650818103883, "Agent": "fqf"}, {"env_step": 400000, "rew": -15.789999866485596, "rew_std": 1.8124292335112842, "Agent": "fqf"}, {"env_step": 500000, "rew": -12.899999952316284, "rew_std": 3.9191835397861126, "Agent": "fqf"}, {"env_step": 600000, "rew": -7.259999930858612, "rew_std": 6.181294202818166, "Agent": "fqf"}, {"env_step": 700000, "rew": -0.2800000667572021, "rew_std": 5.578135949422739, "Agent": "fqf"}, {"env_step": 800000, "rew": 5.889999827742576, "rew_std": 6.357428520171511, "Agent": "fqf"}, {"env_step": 900000, "rew": 12.8555555873447, "rew_std": 5.842585252592779, "Agent": "fqf"}, {"env_step": 1000000, "rew": 18.875, "rew_std": 2.6085196695608577, "Agent": "fqf"}, {"env_step": 1100000, "rew": 18.749999682108562, "rew_std": 2.6196374340932294, "Agent": "fqf"}, {"env_step": 1200000, "rew": 19.65999984741211, "rew_std": 1.051855243717263, "Agent": "fqf"}, {"env_step": 1300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 1400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 1500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 1600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 1700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 1800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 1900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2100000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2200000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 2900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3100000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3200000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 3900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4100000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4200000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 4900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5100000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5200000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 5900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6100000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6200000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 6900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7100000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7200000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 7900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8100000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8200000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 8900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9100000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9200000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9300000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9400000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9500000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9600000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9700000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9800000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 9900000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 10000000, "rew": 20.399999618530273, "rew_std": 0.39999961853027344, "Agent": "fqf"}, {"env_step": 0, "rew": -21.0, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 100000, "rew": -20.870000076293945, "rew_std": 0.27221299202092436, "Agent": "qrdqn"}, {"env_step": 200000, "rew": -19.48000011444092, "rew_std": 1.2023307411527404, "Agent": "qrdqn"}, {"env_step": 300000, "rew": -16.780000019073487, "rew_std": 2.1613883342785347, "Agent": "qrdqn"}, {"env_step": 400000, "rew": -12.920000219345093, "rew_std": 3.473845164617662, "Agent": "qrdqn"}, {"env_step": 500000, "rew": -7.060000002384186, "rew_std": 6.094456461922503, "Agent": "qrdqn"}, {"env_step": 600000, "rew": -3.779999941587448, "rew_std": 6.045295632144355, "Agent": "qrdqn"}, {"env_step": 700000, "rew": 9.749999952316283, "rew_std": 6.640368991575429, "Agent": "qrdqn"}, {"env_step": 800000, "rew": 15.269999933242797, "rew_std": 4.12966090763813, "Agent": "qrdqn"}, {"env_step": 900000, "rew": 19.622222052680122, "rew_std": 1.2916639583656102, "Agent": "qrdqn"}, {"env_step": 1000000, "rew": 20.09999990463257, "rew_std": 0.3162274644388989, "Agent": "qrdqn"}, {"env_step": 1100000, "rew": 19.899999618530273, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 1200000, "rew": 19.899999618530273, "rew_std": 0.10000038146972656, "Agent": "qrdqn"}, {"env_step": 1300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 1400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 1500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 1600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 1700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 1800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 1900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 2900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 3900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 4900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 5900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 6900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 7900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 8900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 9900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 10000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "qrdqn"}, {"env_step": 0, "rew": -20.969999885559083, "rew_std": 0.06403148663413369, "Agent": "iqn"}, {"env_step": 100000, "rew": -19.1300000667572, "rew_std": 5.345100609195572, "Agent": "iqn"}, {"env_step": 200000, "rew": -19.34000015258789, "rew_std": 1.09380072496787, "Agent": "iqn"}, {"env_step": 300000, "rew": -18.3, "rew_std": 1.1471704685545094, "Agent": "iqn"}, {"env_step": 400000, "rew": -14.660000038146972, "rew_std": 2.7383207958984883, "Agent": "iqn"}, {"env_step": 500000, "rew": -9.659999978542327, "rew_std": 5.29871682181189, "Agent": "iqn"}, {"env_step": 600000, "rew": -8.680000057816505, "rew_std": 4.040495106986447, "Agent": "iqn"}, {"env_step": 700000, "rew": 2.8499999545514583, "rew_std": 6.374519581488704, "Agent": "iqn"}, {"env_step": 800000, "rew": 7.970000147819519, "rew_std": 8.160275826601659, "Agent": "iqn"}, {"env_step": 900000, "rew": 17.166666507720947, "rew_std": 4.651164654639624, "Agent": "iqn"}, {"env_step": 1000000, "rew": 17.849999984105427, "rew_std": 4.5853935091484725, "Agent": "iqn"}, {"env_step": 1100000, "rew": 18.260000038146973, "rew_std": 1.9652988864635694, "Agent": "iqn"}, {"env_step": 1200000, "rew": 18.68000030517578, "rew_std": 2.9047550585330666, "Agent": "iqn"}, {"env_step": 1300000, "rew": 19.600000381469727, "rew_std": 0.39999961853027344, "Agent": "iqn"}, {"env_step": 1400000, "rew": 18.799999237060547, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 1500000, "rew": 17.0, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 1600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 1700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 1800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 1900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 2900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 3900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 4900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 5900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 6900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 7900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 8900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9100000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9200000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9300000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9400000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9500000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9600000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9700000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9800000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 9900000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 10000000, "rew": 20.700000762939453, "rew_std": 0.0, "Agent": "iqn"}, {"env_step": 0, "rew": -21.0, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 100000, "rew": -20.979999923706053, "rew_std": 0.04000015258789063, "Agent": "rainbow"}, {"env_step": 200000, "rew": -20.439999961853026, "rew_std": 0.611882543624649, "Agent": "rainbow"}, {"env_step": 300000, "rew": -20.05, "rew_std": 1.0984076053899456, "Agent": "rainbow"}, {"env_step": 400000, "rew": -18.579999923706055, "rew_std": 1.1417527594487265, "Agent": "rainbow"}, {"env_step": 500000, "rew": -16.669999980926512, "rew_std": 2.142918529337897, "Agent": "rainbow"}, {"env_step": 600000, "rew": -14.539999961853027, "rew_std": 3.4153184124021854, "Agent": "rainbow"}, {"env_step": 700000, "rew": -11.319999885559081, "rew_std": 2.876734170162213, "Agent": "rainbow"}, {"env_step": 800000, "rew": -10.470000064373016, "rew_std": 4.46520999148195, "Agent": "rainbow"}, {"env_step": 900000, "rew": -2.170000058412552, "rew_std": 4.194055360234164, "Agent": "rainbow"}, {"env_step": 1000000, "rew": -1.1700000524520875, "rew_std": 7.9131599288409395, "Agent": "rainbow"}, {"env_step": 1100000, "rew": 4.420000007003546, "rew_std": 8.925671038750298, "Agent": "rainbow"}, {"env_step": 1200000, "rew": 4.199999978972806, "rew_std": 7.79358685682886, "Agent": "rainbow"}, {"env_step": 1300000, "rew": 4.3666667805777655, "rew_std": 9.006787650672438, "Agent": "rainbow"}, {"env_step": 1400000, "rew": 8.224999904632568, "rew_std": 5.813507857159169, "Agent": "rainbow"}, {"env_step": 1500000, "rew": 10.48749989271164, "rew_std": 5.611915177475383, "Agent": "rainbow"}, {"env_step": 1600000, "rew": 10.325000084936619, "rew_std": 7.195441264466608, "Agent": "rainbow"}, {"env_step": 1700000, "rew": 5.216666638851166, "rew_std": 8.010496691447514, "Agent": "rainbow"}, {"env_step": 1800000, "rew": 7.8833333651224775, "rew_std": 8.73506665798113, "Agent": "rainbow"}, {"env_step": 1900000, "rew": 10.416666527589163, "rew_std": 7.064799091845216, "Agent": "rainbow"}, {"env_step": 2000000, "rew": 14.739999961853027, "rew_std": 3.559550394534507, "Agent": "rainbow"}, {"env_step": 2100000, "rew": 16.82000026702881, "rew_std": 2.790985522657408, "Agent": "rainbow"}, {"env_step": 2200000, "rew": 14.699999809265137, "rew_std": 3.199374665910315, "Agent": "rainbow"}, {"env_step": 2300000, "rew": 16.800000190734863, "rew_std": 1.987460835388907, "Agent": "rainbow"}, {"env_step": 2400000, "rew": 16.649999856948853, "rew_std": 3.9150350231246187, "Agent": "rainbow"}, {"env_step": 2500000, "rew": 18.700000127156574, "rew_std": 1.8384774512584698, "Agent": "rainbow"}, {"env_step": 2600000, "rew": 9.5, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 2700000, "rew": 16.100000381469727, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 2800000, "rew": 15.5, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 2900000, "rew": 15.600000381469727, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3000000, "rew": -4.300000190734863, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3100000, "rew": 17.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3200000, "rew": 17.799999237060547, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3300000, "rew": 19.100000381469727, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3400000, "rew": 19.5, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3500000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3600000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3700000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3800000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 3900000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4000000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4100000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4200000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4300000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4400000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4500000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4600000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4700000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4800000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 4900000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5000000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5100000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5200000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5300000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5400000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5500000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5600000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5700000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5800000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 5900000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6000000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6100000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6200000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6300000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6400000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6500000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6600000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6700000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6800000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 6900000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7000000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7100000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7200000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7300000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7400000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7500000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7600000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7700000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7800000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 7900000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8000000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8100000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8200000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8300000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8400000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8500000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8600000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8700000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8800000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 8900000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9000000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9100000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9200000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9300000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9400000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9500000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9600000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9700000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9800000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 9900000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 10000000, "rew": 20.200000762939453, "rew_std": 0.0, "Agent": "rainbow"}, {"env_step": 0, "rew": -20.75, "rew_std": 0.34132100802270626, "Agent": "ppo"}, {"env_step": 100000, "rew": -20.61000003814697, "rew_std": 0.32695557164785227, "Agent": "ppo"}, {"env_step": 200000, "rew": -19.98000011444092, "rew_std": 0.5793098790055098, "Agent": "ppo"}, {"env_step": 300000, "rew": -17.55, "rew_std": 2.648112510163754, "Agent": "ppo"}, {"env_step": 400000, "rew": -13.260000038146973, "rew_std": 4.553284472142394, "Agent": "ppo"}, {"env_step": 500000, "rew": -9.600000047683716, "rew_std": 5.175519361004068, "Agent": "ppo"}, {"env_step": 600000, "rew": -8.650000131130218, "rew_std": 5.414471552623239, "Agent": "ppo"}, {"env_step": 700000, "rew": -4.4400000154972075, "rew_std": 5.544041881025165, "Agent": "ppo"}, {"env_step": 800000, "rew": 0.6799999058246613, "rew_std": 7.877410761852243, "Agent": "ppo"}, {"env_step": 900000, "rew": 5.640000033378601, "rew_std": 6.771735266186935, "Agent": "ppo"}, {"env_step": 1000000, "rew": 5.6600001603364944, "rew_std": 7.132489235026172, "Agent": "ppo"}, {"env_step": 1100000, "rew": 6.7699999690055845, "rew_std": 6.8543488111854005, "Agent": "ppo"}, {"env_step": 1200000, "rew": 11.959999895095825, "rew_std": 4.759663803505452, "Agent": "ppo"}, {"env_step": 1300000, "rew": 13.499999952316283, "rew_std": 4.599999950243093, "Agent": "ppo"}, {"env_step": 1400000, "rew": 13.460000038146973, "rew_std": 4.538986609617991, "Agent": "ppo"}, {"env_step": 1500000, "rew": 13.359999942779542, "rew_std": 4.740295478528203, "Agent": "ppo"}, {"env_step": 1600000, "rew": 15.47999997138977, "rew_std": 3.309924420995019, "Agent": "ppo"}, {"env_step": 1700000, "rew": 13.88000020980835, "rew_std": 3.9776378782724717, "Agent": "ppo"}, {"env_step": 1800000, "rew": 16.680000019073486, "rew_std": 2.098475869175282, "Agent": "ppo"}, {"env_step": 1900000, "rew": 15.039999866485596, "rew_std": 3.2720634721834996, "Agent": "ppo"}, {"env_step": 2000000, "rew": 16.200000190734862, "rew_std": 2.0079841339730997, "Agent": "ppo"}, {"env_step": 2100000, "rew": 16.709999752044677, "rew_std": 2.592083999305904, "Agent": "ppo"}, {"env_step": 2200000, "rew": 17.93000011444092, "rew_std": 1.5020321173047337, "Agent": "ppo"}, {"env_step": 2300000, "rew": 16.13333299424913, "rew_std": 4.134945780702597, "Agent": "ppo"}, {"env_step": 2400000, "rew": 16.46666653951009, "rew_std": 2.9988884554980983, "Agent": "ppo"}, {"env_step": 2500000, "rew": 17.266666624281143, "rew_std": 1.7549928259554646, "Agent": "ppo"}, {"env_step": 2600000, "rew": 17.63333363003201, "rew_std": 1.300427226972741, "Agent": "ppo"}, {"env_step": 2700000, "rew": 16.8111113442315, "rew_std": 2.6534861585374485, "Agent": "ppo"}, {"env_step": 2800000, "rew": 17.000000211927627, "rew_std": 2.82999802642146, "Agent": "ppo"}, {"env_step": 2900000, "rew": 16.97777779897054, "rew_std": 2.5494127247858547, "Agent": "ppo"}, {"env_step": 3000000, "rew": 17.81250011920929, "rew_std": 1.6593954848338575, "Agent": "ppo"}, {"env_step": 3100000, "rew": 17.06250011920929, "rew_std": 2.284697851779331, "Agent": "ppo"}, {"env_step": 3200000, "rew": 16.975000381469727, "rew_std": 2.1057958062253594, "Agent": "ppo"}, {"env_step": 3300000, "rew": 16.824999809265137, "rew_std": 2.9625790227338165, "Agent": "ppo"}, {"env_step": 3400000, "rew": 18.1875, "rew_std": 2.531519990815377, "Agent": "ppo"}, {"env_step": 3500000, "rew": 16.71666669845581, "rew_std": 2.412755415928967, "Agent": "ppo"}, {"env_step": 3600000, "rew": 16.46666669845581, "rew_std": 3.901566607031345, "Agent": "ppo"}, {"env_step": 3700000, "rew": 16.166666666666668, "rew_std": 2.739626889190835, "Agent": "ppo"}, {"env_step": 3800000, "rew": 17.300000190734863, "rew_std": 2.78926494928614, "Agent": "ppo"}, {"env_step": 3900000, "rew": 18.09999990463257, "rew_std": 2.077658824212887, "Agent": "ppo"}, {"env_step": 4000000, "rew": 17.019999504089355, "rew_std": 1.4483091764261082, "Agent": "ppo"}, {"env_step": 4100000, "rew": 18.620000076293945, "rew_std": 1.2253983415940415, "Agent": "ppo"}, {"env_step": 4200000, "rew": 18.35999984741211, "rew_std": 2.2526428387365733, "Agent": "ppo"}, {"env_step": 4300000, "rew": 19.0, "rew_std": 0.6519197285159748, "Agent": "ppo"}, {"env_step": 4400000, "rew": 18.975000381469727, "rew_std": 1.3141059206073868, "Agent": "ppo"}, {"env_step": 4500000, "rew": 19.625, "rew_std": 0.3897113582892007, "Agent": "ppo"}, {"env_step": 4600000, "rew": 19.566666920979817, "rew_std": 0.684754539003982, "Agent": "ppo"}, {"env_step": 4700000, "rew": 15.199999809265137, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 4800000, "rew": 17.200000762939453, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 4900000, "rew": 16.799999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5000000, "rew": 18.700000762939453, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5100000, "rew": 16.100000381469727, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5200000, "rew": 17.700000762939453, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5300000, "rew": 17.700000762939453, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5400000, "rew": 17.600000381469727, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5500000, "rew": 16.700000762939453, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5600000, "rew": 19.399999618530273, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5700000, "rew": 19.100000381469727, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5800000, "rew": 18.5, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 5900000, "rew": 18.700000762939453, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6000000, "rew": 19.600000381469727, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6100000, "rew": 19.0, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6200000, "rew": 19.0, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6300000, "rew": 19.100000381469727, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6400000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6500000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6600000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6700000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6800000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 6900000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7000000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7100000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7200000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7300000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7400000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7500000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7600000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7700000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7800000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 7900000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8000000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8100000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8200000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8300000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8400000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8500000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8600000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8700000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8800000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 8900000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9000000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9100000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9200000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9300000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9400000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9500000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9600000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9700000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9800000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 9900000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}, {"env_step": 10000000, "rew": 20.299999237060547, "rew_std": 0.0, "Agent": "ppo"}]
examples/atari/benchmark/QbertNoFrameskip-v4/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"env_step": 0, "rew": 95.0, "rew_std": 75.92759709091287, "Agent": "c51"}, {"env_step": 100000, "rew": 251.5, "rew_std": 82.12186067059123, "Agent": "c51"}, {"env_step": 200000, "rew": 255.5, "rew_std": 117.85478352616833, "Agent": "c51"}, {"env_step": 300000, "rew": 320.0, "rew_std": 42.175229697062704, "Agent": "c51"}, {"env_step": 400000, "rew": 332.5, "rew_std": 83.60621986431393, "Agent": "c51"}, {"env_step": 500000, "rew": 430.25, "rew_std": 135.99839153460601, "Agent": "c51"}, {"env_step": 600000, "rew": 523.75, "rew_std": 108.9681719586045, "Agent": "c51"}, {"env_step": 700000, "rew": 1121.0, "rew_std": 516.2099863427673, "Agent": "c51"}, {"env_step": 800000, "rew": 1754.75, "rew_std": 1004.4398003364861, "Agent": "c51"}, {"env_step": 900000, "rew": 2517.0, "rew_std": 1108.9954914245593, "Agent": "c51"}, {"env_step": 1000000, "rew": 2285.0, "rew_std": 1031.815753901829, "Agent": "c51"}, {"env_step": 1100000, "rew": 2877.75, "rew_std": 1017.4357780715203, "Agent": "c51"}, {"env_step": 1200000, "rew": 3304.5, "rew_std": 804.6146593245738, "Agent": "c51"}, {"env_step": 1300000, "rew": 3511.25, "rew_std": 1172.84443235239, "Agent": "c51"}, {"env_step": 1400000, "rew": 3495.0, "rew_std": 713.4371030441296, "Agent": "c51"}, {"env_step": 1500000, "rew": 3199.0, "rew_std": 1272.923897960911, "Agent": "c51"}, {"env_step": 1600000, "rew": 3992.0, "rew_std": 1011.4944389367645, "Agent": "c51"}, {"env_step": 1700000, "rew": 4453.75, "rew_std": 1278.8536126156114, "Agent": "c51"}, {"env_step": 1800000, "rew": 3931.5, "rew_std": 1001.7822867270114, "Agent": "c51"}, {"env_step": 1900000, "rew": 4928.25, "rew_std": 1331.802842953866, "Agent": "c51"}, {"env_step": 2000000, "rew": 4457.0, "rew_std": 1296.9019816470325, "Agent": "c51"}, {"env_step": 2100000, "rew": 5236.75, "rew_std": 1800.0354336790151, "Agent": "c51"}, {"env_step": 2200000, "rew": 4757.25, "rew_std": 1431.1256277839482, "Agent": "c51"}, {"env_step": 2300000, "rew": 4738.25, "rew_std": 1369.4538373015719, "Agent": "c51"}, {"env_step": 2400000, "rew": 6592.0, "rew_std": 1420.6903251588644, "Agent": "c51"}, {"env_step": 2500000, "rew": 5894.25, "rew_std": 1735.2507203571477, "Agent": "c51"}, {"env_step": 2600000, "rew": 7282.25, "rew_std": 2613.5424833929906, "Agent": "c51"}, {"env_step": 2700000, "rew": 7078.25, "rew_std": 2062.308188050467, "Agent": "c51"}, {"env_step": 2800000, "rew": 6066.25, "rew_std": 1512.1017037554054, "Agent": "c51"}, {"env_step": 2900000, "rew": 7065.5, "rew_std": 2015.3544353289324, "Agent": "c51"}, {"env_step": 3000000, "rew": 6861.0, "rew_std": 1669.891538394036, "Agent": "c51"}, {"env_step": 3100000, "rew": 7762.75, "rew_std": 2067.515553145852, "Agent": "c51"}, {"env_step": 3200000, "rew": 7553.5, "rew_std": 2434.32644688423, "Agent": "c51"}, {"env_step": 3300000, "rew": 6468.25, "rew_std": 1466.2916873869265, "Agent": "c51"}, {"env_step": 3400000, "rew": 7396.25, "rew_std": 2111.3411762431956, "Agent": "c51"}, {"env_step": 3500000, "rew": 7398.75, "rew_std": 2466.653413534216, "Agent": "c51"}, {"env_step": 3600000, "rew": 7548.75, "rew_std": 2775.422546658436, "Agent": "c51"}, {"env_step": 3700000, "rew": 8335.5, "rew_std": 2109.992239322221, "Agent": "c51"}, {"env_step": 3800000, "rew": 6925.0, "rew_std": 1951.7191140120547, "Agent": "c51"}, {"env_step": 3900000, "rew": 7580.5, "rew_std": 2267.1120947143304, "Agent": "c51"}, {"env_step": 4000000, "rew": 8586.75, "rew_std": 2490.3042108344916, "Agent": "c51"}, {"env_step": 4100000, "rew": 8712.75, "rew_std": 2641.0264221510547, "Agent": "c51"}, {"env_step": 4200000, "rew": 9052.75, "rew_std": 1451.2083112013934, "Agent": "c51"}, {"env_step": 4300000, "rew": 7919.75, "rew_std": 1486.0133116833106, "Agent": "c51"}, {"env_step": 4400000, "rew": 9568.25, "rew_std": 2890.1516071825713, "Agent": "c51"}, {"env_step": 4500000, "rew": 8489.0, "rew_std": 1878.7070687044322, "Agent": "c51"}, {"env_step": 4600000, "rew": 8453.75, "rew_std": 2539.73577611924, "Agent": "c51"}, {"env_step": 4700000, "rew": 8407.0, "rew_std": 2617.2267765709566, "Agent": "c51"}, {"env_step": 4800000, "rew": 8893.25, "rew_std": 2978.127568204559, "Agent": "c51"}, {"env_step": 4900000, "rew": 10263.75, "rew_std": 2290.572322040935, "Agent": "c51"}, {"env_step": 5000000, "rew": 8514.5, "rew_std": 1787.0897431298743, "Agent": "c51"}, {"env_step": 5100000, "rew": 8638.75, "rew_std": 2969.4349736102995, "Agent": "c51"}, {"env_step": 5200000, "rew": 10585.75, "rew_std": 3481.069708078251, "Agent": "c51"}, {"env_step": 5300000, "rew": 9607.5, "rew_std": 2606.770032051159, "Agent": "c51"}, {"env_step": 5400000, "rew": 9306.5, "rew_std": 2684.2033734424817, "Agent": "c51"}, {"env_step": 5500000, "rew": 9660.75, "rew_std": 2237.9474552589477, "Agent": "c51"}, {"env_step": 5600000, "rew": 9766.25, "rew_std": 2911.0542012302003, "Agent": "c51"}, {"env_step": 5700000, "rew": 10415.5, "rew_std": 1625.4448621838885, "Agent": "c51"}, {"env_step": 5800000, "rew": 9485.5, "rew_std": 3670.640407340387, "Agent": "c51"}, {"env_step": 5900000, "rew": 10269.0, "rew_std": 2380.1436931412354, "Agent": "c51"}, {"env_step": 6000000, "rew": 10933.5, "rew_std": 2768.2542332668795, "Agent": "c51"}, {"env_step": 6100000, "rew": 10309.25, "rew_std": 2190.8112224698866, "Agent": "c51"}, {"env_step": 6200000, "rew": 10257.0, "rew_std": 3413.4135773445328, "Agent": "c51"}, {"env_step": 6300000, "rew": 9958.0, "rew_std": 2849.388925717232, "Agent": "c51"}, {"env_step": 6400000, "rew": 11790.0, "rew_std": 1323.6403401226482, "Agent": "c51"}, {"env_step": 6500000, "rew": 10310.75, "rew_std": 2311.68581613073, "Agent": "c51"}, {"env_step": 6600000, "rew": 9120.75, "rew_std": 2925.0596254606503, "Agent": "c51"}, {"env_step": 6700000, "rew": 10305.5, "rew_std": 2839.6374768621436, "Agent": "c51"}, {"env_step": 6800000, "rew": 10348.75, "rew_std": 3006.7963121069574, "Agent": "c51"}, {"env_step": 6900000, "rew": 10654.25, "rew_std": 1407.1265268269233, "Agent": "c51"}, {"env_step": 7000000, "rew": 11493.75, "rew_std": 1194.8479244238574, "Agent": "c51"}, {"env_step": 7100000, "rew": 11250.5, "rew_std": 1915.5099843122719, "Agent": "c51"}, {"env_step": 7200000, "rew": 10615.75, "rew_std": 2852.864141963301, "Agent": "c51"}, {"env_step": 7300000, "rew": 10428.75, "rew_std": 1486.1473892249046, "Agent": "c51"}, {"env_step": 7400000, "rew": 11293.0, "rew_std": 2100.1969550496924, "Agent": "c51"}, {"env_step": 7500000, "rew": 10405.0, "rew_std": 2845.91066268778, "Agent": "c51"}, {"env_step": 7600000, "rew": 11912.75, "rew_std": 1889.1385106709354, "Agent": "c51"}, {"env_step": 7700000, "rew": 10792.75, "rew_std": 2319.9715650197095, "Agent": "c51"}, {"env_step": 7800000, "rew": 11481.75, "rew_std": 2059.718442530435, "Agent": "c51"}, {"env_step": 7900000, "rew": 11188.0, "rew_std": 1572.3460973971348, "Agent": "c51"}, {"env_step": 8000000, "rew": 11333.25, "rew_std": 2443.5376634093445, "Agent": "c51"}, {"env_step": 8100000, "rew": 11388.75, "rew_std": 1806.7637677626813, "Agent": "c51"}, {"env_step": 8200000, "rew": 11084.25, "rew_std": 2011.5637729139983, "Agent": "c51"}, {"env_step": 8300000, "rew": 11189.25, "rew_std": 1837.155767075835, "Agent": "c51"}, {"env_step": 8400000, "rew": 12201.5, "rew_std": 1443.038547648676, "Agent": "c51"}, {"env_step": 8500000, "rew": 12172.0, "rew_std": 2153.40886735427, "Agent": "c51"}, {"env_step": 8600000, "rew": 10667.0, "rew_std": 2920.304093754621, "Agent": "c51"}, {"env_step": 8700000, "rew": 12087.25, "rew_std": 1455.5503469478479, "Agent": "c51"}, {"env_step": 8800000, "rew": 11311.0, "rew_std": 2612.836868233453, "Agent": "c51"}, {"env_step": 8900000, "rew": 12494.75, "rew_std": 2119.100767424711, "Agent": "c51"}, {"env_step": 9000000, "rew": 12513.25, "rew_std": 1274.6416408151745, "Agent": "c51"}, {"env_step": 9100000, "rew": 12241.0, "rew_std": 1972.8945106112492, "Agent": "c51"}, {"env_step": 9200000, "rew": 10962.25, "rew_std": 1657.8398784261403, "Agent": "c51"}, {"env_step": 9300000, "rew": 11570.25, "rew_std": 2591.904813549294, "Agent": "c51"}, {"env_step": 9400000, "rew": 11239.25, "rew_std": 2040.6086867648094, "Agent": "c51"}, {"env_step": 9500000, "rew": 11834.25, "rew_std": 1834.925083620582, "Agent": "c51"}, {"env_step": 9600000, "rew": 11510.5, "rew_std": 1754.8346788230508, "Agent": "c51"}, {"env_step": 9700000, "rew": 10276.75, "rew_std": 2304.5601668214263, "Agent": "c51"}, {"env_step": 9800000, "rew": 12446.75, "rew_std": 1572.9002074194027, "Agent": "c51"}, {"env_step": 9900000, "rew": 10765.0, "rew_std": 2277.32930205537, "Agent": "c51"}, {"env_step": 10000000, "rew": 11854.5, "rew_std": 2126.8074078298673, "Agent": "c51"}, {"env_step": 0, "rew": 79.5, "rew_std": 76.44278906476399, "Agent": "dqn"}, {"env_step": 100000, "rew": 306.5, "rew_std": 140.31749712705113, "Agent": "dqn"}, {"env_step": 200000, "rew": 409.5, "rew_std": 96.2925230742242, "Agent": "dqn"}, {"env_step": 300000, "rew": 537.25, "rew_std": 147.0180686174322, "Agent": "dqn"}, {"env_step": 400000, "rew": 534.25, "rew_std": 124.05165254844451, "Agent": "dqn"}, {"env_step": 500000, "rew": 725.25, "rew_std": 251.90883767744236, "Agent": "dqn"}, {"env_step": 600000, "rew": 669.5, "rew_std": 160.39326669159152, "Agent": "dqn"}, {"env_step": 700000, "rew": 958.5, "rew_std": 439.7985334218385, "Agent": "dqn"}, {"env_step": 800000, "rew": 818.5, "rew_std": 111.63668751803773, "Agent": "dqn"}, {"env_step": 900000, "rew": 778.75, "rew_std": 199.5408792703891, "Agent": "dqn"}, {"env_step": 1000000, "rew": 850.0, "rew_std": 283.47618947629445, "Agent": "dqn"}, {"env_step": 1100000, "rew": 1346.0, "rew_std": 645.7797612189469, "Agent": "dqn"}, {"env_step": 1200000, "rew": 1157.5, "rew_std": 768.8619837135922, "Agent": "dqn"}, {"env_step": 1300000, "rew": 1414.5, "rew_std": 999.8636156996613, "Agent": "dqn"}, {"env_step": 1400000, "rew": 1861.25, "rew_std": 1166.1422779832656, "Agent": "dqn"}, {"env_step": 1500000, "rew": 2099.75, "rew_std": 986.7018609995625, "Agent": "dqn"}, {"env_step": 1600000, "rew": 2019.0, "rew_std": 728.7679671884598, "Agent": "dqn"}, {"env_step": 1700000, "rew": 3189.0, "rew_std": 1119.5803901462368, "Agent": "dqn"}, {"env_step": 1800000, "rew": 3215.5, "rew_std": 1019.3391241387726, "Agent": "dqn"}, {"env_step": 1900000, "rew": 4062.5, "rew_std": 644.8352502771542, "Agent": "dqn"}, {"env_step": 2000000, "rew": 3697.75, "rew_std": 775.0285881823974, "Agent": "dqn"}, {"env_step": 2100000, "rew": 4084.75, "rew_std": 369.5460898183067, "Agent": "dqn"}, {"env_step": 2200000, "rew": 4364.5, "rew_std": 82.35441700358275, "Agent": "dqn"}, {"env_step": 2300000, "rew": 3960.5, "rew_std": 493.58357954859076, "Agent": "dqn"}, {"env_step": 2400000, "rew": 4298.5, "rew_std": 337.0908631215032, "Agent": "dqn"}, {"env_step": 2500000, "rew": 3868.5, "rew_std": 810.0564795123856, "Agent": "dqn"}, {"env_step": 2600000, "rew": 3593.0, "rew_std": 1069.2274079913964, "Agent": "dqn"}, {"env_step": 2700000, "rew": 3861.5, "rew_std": 863.5603626846244, "Agent": "dqn"}, {"env_step": 2800000, "rew": 4479.75, "rew_std": 226.15108334916283, "Agent": "dqn"}, {"env_step": 2900000, "rew": 4399.25, "rew_std": 278.67106505699513, "Agent": "dqn"}, {"env_step": 3000000, "rew": 4731.0, "rew_std": 975.6428649869787, "Agent": "dqn"}, {"env_step": 3100000, "rew": 4451.0, "rew_std": 1066.7041529871344, "Agent": "dqn"}, {"env_step": 3200000, "rew": 4260.0, "rew_std": 1112.3870729202133, "Agent": "dqn"}, {"env_step": 3300000, "rew": 4400.75, "rew_std": 758.1804287239285, "Agent": "dqn"}, {"env_step": 3400000, "rew": 4580.5, "rew_std": 901.3668786903588, "Agent": "dqn"}, {"env_step": 3500000, "rew": 4537.0, "rew_std": 1127.5176273566635, "Agent": "dqn"}, {"env_step": 3600000, "rew": 5060.75, "rew_std": 1816.7983686969778, "Agent": "dqn"}, {"env_step": 3700000, "rew": 5504.0, "rew_std": 1962.111808740776, "Agent": "dqn"}, {"env_step": 3800000, "rew": 5938.25, "rew_std": 1861.7293875587827, "Agent": "dqn"}, {"env_step": 3900000, "rew": 5781.75, "rew_std": 1370.2176150159507, "Agent": "dqn"}, {"env_step": 4000000, "rew": 5990.25, "rew_std": 3394.9163189245182, "Agent": "dqn"}, {"env_step": 4100000, "rew": 6092.75, "rew_std": 2065.6846473990167, "Agent": "dqn"}, {"env_step": 4200000, "rew": 6176.0, "rew_std": 1842.3508080710362, "Agent": "dqn"}, {"env_step": 4300000, "rew": 6576.5, "rew_std": 2726.7487966440913, "Agent": "dqn"}, {"env_step": 4400000, "rew": 6971.25, "rew_std": 3082.8676281183402, "Agent": "dqn"}, {"env_step": 4500000, "rew": 6908.25, "rew_std": 2762.2427595162594, "Agent": "dqn"}, {"env_step": 4600000, "rew": 7546.0, "rew_std": 2864.2300885229174, "Agent": "dqn"}, {"env_step": 4700000, "rew": 7737.75, "rew_std": 3928.65680767613, "Agent": "dqn"}, {"env_step": 4800000, "rew": 8261.75, "rew_std": 3556.5873829416873, "Agent": "dqn"}, {"env_step": 4900000, "rew": 8120.5, "rew_std": 2792.5308413695275, "Agent": "dqn"}, {"env_step": 5000000, "rew": 7459.25, "rew_std": 3016.322481516192, "Agent": "dqn"}, {"env_step": 5100000, "rew": 8186.25, "rew_std": 3262.4464076058016, "Agent": "dqn"}, {"env_step": 5200000, "rew": 8457.75, "rew_std": 3065.806062441002, "Agent": "dqn"}, {"env_step": 5300000, "rew": 7461.25, "rew_std": 2633.543062586978, "Agent": "dqn"}, {"env_step": 5400000, "rew": 8212.25, "rew_std": 2857.8948655435174, "Agent": "dqn"}, {"env_step": 5500000, "rew": 8331.0, "rew_std": 2962.497088606164, "Agent": "dqn"}, {"env_step": 5600000, "rew": 8116.0, "rew_std": 3106.8304186099376, "Agent": "dqn"}, {"env_step": 5700000, "rew": 8354.0, "rew_std": 2939.679446810485, "Agent": "dqn"}, {"env_step": 5800000, "rew": 8698.25, "rew_std": 2624.4728161099324, "Agent": "dqn"}, {"env_step": 5900000, "rew": 9697.25, "rew_std": 2572.896337301602, "Agent": "dqn"}, {"env_step": 6000000, "rew": 8455.0, "rew_std": 1774.5978417658464, "Agent": "dqn"}, {"env_step": 6100000, "rew": 9885.75, "rew_std": 3028.3760190075473, "Agent": "dqn"}, {"env_step": 6200000, "rew": 8983.5, "rew_std": 2107.2515274641514, "Agent": "dqn"}, {"env_step": 6300000, "rew": 9419.75, "rew_std": 2727.142838668338, "Agent": "dqn"}, {"env_step": 6400000, "rew": 8409.25, "rew_std": 3007.3811385489535, "Agent": "dqn"}, {"env_step": 6500000, "rew": 9823.75, "rew_std": 2742.98269453163, "Agent": "dqn"}, {"env_step": 6600000, "rew": 9702.25, "rew_std": 2529.285336315379, "Agent": "dqn"}, {"env_step": 6700000, "rew": 10412.5, "rew_std": 2968.082925054487, "Agent": "dqn"}, {"env_step": 6800000, "rew": 9085.25, "rew_std": 2521.6067422379724, "Agent": "dqn"}, {"env_step": 6900000, "rew": 9624.25, "rew_std": 2870.277654252285, "Agent": "dqn"}, {"env_step": 7000000, "rew": 10178.25, "rew_std": 2328.1741907554942, "Agent": "dqn"}, {"env_step": 7100000, "rew": 9411.75, "rew_std": 3466.6296762850225, "Agent": "dqn"}, {"env_step": 7200000, "rew": 10059.0, "rew_std": 2418.3835510522313, "Agent": "dqn"}, {"env_step": 7300000, "rew": 9972.25, "rew_std": 3165.8356815381308, "Agent": "dqn"}, {"env_step": 7400000, "rew": 9769.25, "rew_std": 3534.1402861940837, "Agent": "dqn"}, {"env_step": 7500000, "rew": 9630.75, "rew_std": 3561.6785105480812, "Agent": "dqn"}, {"env_step": 7600000, "rew": 10130.5, "rew_std": 2504.094846446516, "Agent": "dqn"}, {"env_step": 7700000, "rew": 9689.75, "rew_std": 2412.3324941848296, "Agent": "dqn"}, {"env_step": 7800000, "rew": 9682.5, "rew_std": 2696.419848614084, "Agent": "dqn"}, {"env_step": 7900000, "rew": 8600.25, "rew_std": 4069.30498519096, "Agent": "dqn"}, {"env_step": 8000000, "rew": 10808.25, "rew_std": 1838.3657994262187, "Agent": "dqn"}, {"env_step": 8100000, "rew": 10105.5, "rew_std": 3078.21819402069, "Agent": "dqn"}, {"env_step": 8200000, "rew": 9794.25, "rew_std": 3020.5171432223324, "Agent": "dqn"}, {"env_step": 8300000, "rew": 10248.5, "rew_std": 2272.298450908243, "Agent": "dqn"}, {"env_step": 8400000, "rew": 9916.5, "rew_std": 3159.7433044473723, "Agent": "dqn"}, {"env_step": 8500000, "rew": 10325.5, "rew_std": 2780.830316650047, "Agent": "dqn"}, {"env_step": 8600000, "rew": 10778.0, "rew_std": 1940.7523669958514, "Agent": "dqn"}, {"env_step": 8700000, "rew": 10993.0, "rew_std": 2580.0946688057784, "Agent": "dqn"}, {"env_step": 8800000, "rew": 10329.75, "rew_std": 2510.3706026202585, "Agent": "dqn"}, {"env_step": 8900000, "rew": 9983.0, "rew_std": 3615.9431342320636, "Agent": "dqn"}, {"env_step": 9000000, "rew": 11148.0, "rew_std": 1932.5183698997535, "Agent": "dqn"}, {"env_step": 9100000, "rew": 10034.75, "rew_std": 2345.046494741629, "Agent": "dqn"}, {"env_step": 9200000, "rew": 10810.75, "rew_std": 2402.0418527785896, "Agent": "dqn"}, {"env_step": 9300000, "rew": 10502.5, "rew_std": 2058.038811587381, "Agent": "dqn"}, {"env_step": 9400000, "rew": 10956.0, "rew_std": 1991.7147762669233, "Agent": "dqn"}, {"env_step": 9500000, "rew": 11620.25, "rew_std": 786.060947064539, "Agent": "dqn"}, {"env_step": 9600000, "rew": 10733.5, "rew_std": 2011.6753589980665, "Agent": "dqn"}, {"env_step": 9700000, "rew": 11486.25, "rew_std": 2341.8905957580514, "Agent": "dqn"}, {"env_step": 9800000, "rew": 11012.5, "rew_std": 2049.413025722243, "Agent": "dqn"}, {"env_step": 9900000, "rew": 10990.5, "rew_std": 1687.970601047305, "Agent": "dqn"}, {"env_step": 10000000, "rew": 11396.5, "rew_std": 1123.2326117060527, "Agent": "dqn"}, {"env_step": 0, "rew": 62.25, "rew_std": 64.61859252568102, "Agent": "fqf"}, {"env_step": 100000, "rew": 282.5, "rew_std": 133.41195598596101, "Agent": "fqf"}, {"env_step": 200000, "rew": 334.25, "rew_std": 97.66684442532174, "Agent": "fqf"}, {"env_step": 300000, "rew": 478.0, "rew_std": 103.5, "Agent": "fqf"}, {"env_step": 400000, "rew": 497.75, "rew_std": 127.49730389306278, "Agent": "fqf"}, {"env_step": 500000, "rew": 761.75, "rew_std": 323.0790344482291, "Agent": "fqf"}, {"env_step": 600000, "rew": 723.25, "rew_std": 85.77623505377233, "Agent": "fqf"}, {"env_step": 700000, "rew": 1184.75, "rew_std": 753.0441969101149, "Agent": "fqf"}, {"env_step": 800000, "rew": 1227.25, "rew_std": 684.0965301037567, "Agent": "fqf"}, {"env_step": 900000, "rew": 1899.75, "rew_std": 957.4160864013096, "Agent": "fqf"}, {"env_step": 1000000, "rew": 1912.5, "rew_std": 1270.665180132044, "Agent": "fqf"}, {"env_step": 1100000, "rew": 2567.5, "rew_std": 1188.7546004117082, "Agent": "fqf"}, {"env_step": 1200000, "rew": 3371.0, "rew_std": 1017.2175283586103, "Agent": "fqf"}, {"env_step": 1300000, "rew": 3156.25, "rew_std": 890.8782534667686, "Agent": "fqf"}, {"env_step": 1400000, "rew": 3885.0, "rew_std": 888.4551198569346, "Agent": "fqf"}, {"env_step": 1500000, "rew": 3952.75, "rew_std": 590.0110698114062, "Agent": "fqf"}, {"env_step": 1600000, "rew": 3700.0, "rew_std": 1213.4516883667022, "Agent": "fqf"}, {"env_step": 1700000, "rew": 4309.75, "rew_std": 1129.6019486969735, "Agent": "fqf"}, {"env_step": 1800000, "rew": 4612.75, "rew_std": 1088.452714866383, "Agent": "fqf"}, {"env_step": 1900000, "rew": 5602.25, "rew_std": 1122.1271374046703, "Agent": "fqf"}, {"env_step": 2000000, "rew": 6148.5, "rew_std": 2185.0435350354005, "Agent": "fqf"}, {"env_step": 2100000, "rew": 6673.75, "rew_std": 1807.6529153850304, "Agent": "fqf"}, {"env_step": 2200000, "rew": 6371.75, "rew_std": 2170.01153510759, "Agent": "fqf"}, {"env_step": 2300000, "rew": 6601.0, "rew_std": 2183.5877014674725, "Agent": "fqf"}, {"env_step": 2400000, "rew": 7732.0, "rew_std": 1939.5839760113508, "Agent": "fqf"}, {"env_step": 2500000, "rew": 8078.25, "rew_std": 2086.995762453772, "Agent": "fqf"}, {"env_step": 2600000, "rew": 9642.5, "rew_std": 2714.1858816227013, "Agent": "fqf"}, {"env_step": 2700000, "rew": 10048.5, "rew_std": 2313.9531110201865, "Agent": "fqf"}, {"env_step": 2800000, "rew": 9025.75, "rew_std": 3670.8754150066165, "Agent": "fqf"}, {"env_step": 2900000, "rew": 9993.5, "rew_std": 3190.4126300527337, "Agent": "fqf"}, {"env_step": 3000000, "rew": 10725.75, "rew_std": 1486.178846067996, "Agent": "fqf"}, {"env_step": 3100000, "rew": 12443.0, "rew_std": 1860.8062096843937, "Agent": "fqf"}, {"env_step": 3200000, "rew": 11651.5, "rew_std": 1916.6462245286687, "Agent": "fqf"}, {"env_step": 3300000, "rew": 11780.25, "rew_std": 2378.499645259591, "Agent": "fqf"}, {"env_step": 3400000, "rew": 12591.25, "rew_std": 1730.6852869600527, "Agent": "fqf"}, {"env_step": 3500000, "rew": 13177.25, "rew_std": 1040.0303180676995, "Agent": "fqf"}, {"env_step": 3600000, "rew": 12289.75, "rew_std": 3415.4978498748906, "Agent": "fqf"}, {"env_step": 3700000, "rew": 12660.0, "rew_std": 1981.2193215290426, "Agent": "fqf"}, {"env_step": 3800000, "rew": 12749.0, "rew_std": 2114.099453668157, "Agent": "fqf"}, {"env_step": 3900000, "rew": 13807.25, "rew_std": 1109.9293051811903, "Agent": "fqf"}, {"env_step": 4000000, "rew": 14015.25, "rew_std": 1171.8481396921702, "Agent": "fqf"}, {"env_step": 4100000, "rew": 13752.25, "rew_std": 1630.1025466209173, "Agent": "fqf"}, {"env_step": 4200000, "rew": 14020.5, "rew_std": 1309.8782386160938, "Agent": "fqf"}, {"env_step": 4300000, "rew": 13418.75, "rew_std": 1649.8266007371806, "Agent": "fqf"}, {"env_step": 4400000, "rew": 14221.5, "rew_std": 1284.3087634988715, "Agent": "fqf"}, {"env_step": 4500000, "rew": 14305.75, "rew_std": 859.7587234218679, "Agent": "fqf"}, {"env_step": 4600000, "rew": 14158.0, "rew_std": 1344.8414404679831, "Agent": "fqf"}, {"env_step": 4700000, "rew": 12771.5, "rew_std": 1663.6489263062685, "Agent": "fqf"}, {"env_step": 4800000, "rew": 14314.0, "rew_std": 1097.285970018755, "Agent": "fqf"}, {"env_step": 4900000, "rew": 14935.25, "rew_std": 337.25074128902963, "Agent": "fqf"}, {"env_step": 5000000, "rew": 14672.0, "rew_std": 807.711117912834, "Agent": "fqf"}, {"env_step": 5100000, "rew": 14673.0, "rew_std": 571.9405563517943, "Agent": "fqf"}, {"env_step": 5200000, "rew": 14309.75, "rew_std": 1108.4434187183394, "Agent": "fqf"}, {"env_step": 5300000, "rew": 14757.25, "rew_std": 947.0417427442151, "Agent": "fqf"}, {"env_step": 5400000, "rew": 14685.0, "rew_std": 655.1602857316674, "Agent": "fqf"}, {"env_step": 5500000, "rew": 14524.25, "rew_std": 979.248468214273, "Agent": "fqf"}, {"env_step": 5600000, "rew": 14862.5, "rew_std": 499.7686964986903, "Agent": "fqf"}, {"env_step": 5700000, "rew": 14338.0, "rew_std": 1270.7752555035056, "Agent": "fqf"}, {"env_step": 5800000, "rew": 14777.75, "rew_std": 538.4253081904676, "Agent": "fqf"}, {"env_step": 5900000, "rew": 14932.0, "rew_std": 720.0848908288522, "Agent": "fqf"}, {"env_step": 6000000, "rew": 15026.25, "rew_std": 556.9619039934419, "Agent": "fqf"}, {"env_step": 6100000, "rew": 15113.75, "rew_std": 255.20151351432068, "Agent": "fqf"}, {"env_step": 6200000, "rew": 14408.5, "rew_std": 1393.7912325739462, "Agent": "fqf"}, {"env_step": 6300000, "rew": 15156.5, "rew_std": 590.047879413188, "Agent": "fqf"}, {"env_step": 6400000, "rew": 14545.5, "rew_std": 1392.182100157878, "Agent": "fqf"}, {"env_step": 6500000, "rew": 14554.75, "rew_std": 1060.3109508535692, "Agent": "fqf"}, {"env_step": 6600000, "rew": 13926.25, "rew_std": 1543.7536437203962, "Agent": "fqf"}, {"env_step": 6700000, "rew": 14911.25, "rew_std": 508.5976430342555, "Agent": "fqf"}, {"env_step": 6800000, "rew": 14964.0, "rew_std": 1249.9880999433556, "Agent": "fqf"}, {"env_step": 6900000, "rew": 15271.75, "rew_std": 499.26827708157066, "Agent": "fqf"}, {"env_step": 7000000, "rew": 14915.25, "rew_std": 710.6022533738548, "Agent": "fqf"}, {"env_step": 7100000, "rew": 14988.5, "rew_std": 568.0396112948463, "Agent": "fqf"}, {"env_step": 7200000, "rew": 14881.25, "rew_std": 963.4282861220133, "Agent": "fqf"}, {"env_step": 7300000, "rew": 15227.75, "rew_std": 746.1756244343553, "Agent": "fqf"}, {"env_step": 7400000, "rew": 15052.0, "rew_std": 1012.3807337163228, "Agent": "fqf"}, {"env_step": 7500000, "rew": 15262.75, "rew_std": 626.2052878250071, "Agent": "fqf"}, {"env_step": 7600000, "rew": 14771.75, "rew_std": 516.1831675868557, "Agent": "fqf"}, {"env_step": 7700000, "rew": 14902.25, "rew_std": 1191.0822022429854, "Agent": "fqf"}, {"env_step": 7800000, "rew": 15195.0, "rew_std": 983.0596370515881, "Agent": "fqf"}, {"env_step": 7900000, "rew": 15172.75, "rew_std": 897.3812247311619, "Agent": "fqf"}, {"env_step": 8000000, "rew": 14729.5, "rew_std": 1125.9345007592583, "Agent": "fqf"}, {"env_step": 8100000, "rew": 14950.75, "rew_std": 407.5706227146407, "Agent": "fqf"}, {"env_step": 8200000, "rew": 14679.25, "rew_std": 1469.804004791115, "Agent": "fqf"}, {"env_step": 8300000, "rew": 14879.75, "rew_std": 1249.1259193932372, "Agent": "fqf"}, {"env_step": 8400000, "rew": 14759.25, "rew_std": 824.2845761628687, "Agent": "fqf"}, {"env_step": 8500000, "rew": 14181.25, "rew_std": 1934.2803086678, "Agent": "fqf"}, {"env_step": 8600000, "rew": 15150.75, "rew_std": 606.5559022052296, "Agent": "fqf"}, {"env_step": 8700000, "rew": 15301.25, "rew_std": 684.131977399098, "Agent": "fqf"}, {"env_step": 8800000, "rew": 15258.75, "rew_std": 178.02826320559328, "Agent": "fqf"}, {"env_step": 8900000, "rew": 14306.75, "rew_std": 2652.5966169962594, "Agent": "fqf"}, {"env_step": 9000000, "rew": 14469.5, "rew_std": 1781.5501676910476, "Agent": "fqf"}, {"env_step": 9100000, "rew": 14648.25, "rew_std": 983.8413553515628, "Agent": "fqf"}, {"env_step": 9200000, "rew": 15119.25, "rew_std": 669.5624037384417, "Agent": "fqf"}, {"env_step": 9300000, "rew": 14687.75, "rew_std": 914.5568940749395, "Agent": "fqf"}, {"env_step": 9400000, "rew": 14220.0, "rew_std": 3311.433790671346, "Agent": "fqf"}, {"env_step": 9500000, "rew": 15234.75, "rew_std": 382.4288332487497, "Agent": "fqf"}, {"env_step": 9600000, "rew": 14718.75, "rew_std": 632.6375838503432, "Agent": "fqf"}, {"env_step": 9700000, "rew": 14343.5, "rew_std": 1404.7336046382602, "Agent": "fqf"}, {"env_step": 9800000, "rew": 15267.5, "rew_std": 387.3209263646879, "Agent": "fqf"}, {"env_step": 9900000, "rew": 15137.75, "rew_std": 331.75, "Agent": "fqf"}, {"env_step": 10000000, "rew": 14602.75, "rew_std": 1270.1847552620052, "Agent": "fqf"}, {"env_step": 0, "rew": 63.5, "rew_std": 62.13091018164791, "Agent": "qrdqn"}, {"env_step": 100000, "rew": 270.75, "rew_std": 151.2119786921658, "Agent": "qrdqn"}, {"env_step": 200000, "rew": 330.0, "rew_std": 140.30324301312496, "Agent": "qrdqn"}, {"env_step": 300000, "rew": 482.5, "rew_std": 145.48625364617786, "Agent": "qrdqn"}, {"env_step": 400000, "rew": 655.25, "rew_std": 164.68701375639793, "Agent": "qrdqn"}, {"env_step": 500000, "rew": 624.5, "rew_std": 130.56033088193365, "Agent": "qrdqn"}, {"env_step": 600000, "rew": 676.5, "rew_std": 131.37351331223505, "Agent": "qrdqn"}, {"env_step": 700000, "rew": 628.25, "rew_std": 158.0587311729409, "Agent": "qrdqn"}, {"env_step": 800000, "rew": 1161.25, "rew_std": 710.1436914456116, "Agent": "qrdqn"}, {"env_step": 900000, "rew": 1550.25, "rew_std": 826.7983808039297, "Agent": "qrdqn"}, {"env_step": 1000000, "rew": 1962.5, "rew_std": 961.0228925473108, "Agent": "qrdqn"}, {"env_step": 1100000, "rew": 2176.0, "rew_std": 1403.9837071704214, "Agent": "qrdqn"}, {"env_step": 1200000, "rew": 2638.5, "rew_std": 1025.882668729714, "Agent": "qrdqn"}, {"env_step": 1300000, "rew": 3701.0, "rew_std": 630.7289433663243, "Agent": "qrdqn"}, {"env_step": 1400000, "rew": 3190.25, "rew_std": 947.5115104841735, "Agent": "qrdqn"}, {"env_step": 1500000, "rew": 3946.75, "rew_std": 637.7578400145309, "Agent": "qrdqn"}, {"env_step": 1600000, "rew": 4426.5, "rew_std": 815.5735711755256, "Agent": "qrdqn"}, {"env_step": 1700000, "rew": 4326.25, "rew_std": 986.4046139896143, "Agent": "qrdqn"}, {"env_step": 1800000, "rew": 4494.5, "rew_std": 949.5484453149297, "Agent": "qrdqn"}, {"env_step": 1900000, "rew": 4857.5, "rew_std": 1134.8067016016428, "Agent": "qrdqn"}, {"env_step": 2000000, "rew": 4661.0, "rew_std": 2612.279225121235, "Agent": "qrdqn"}, {"env_step": 2100000, "rew": 6238.5, "rew_std": 2523.3789846156683, "Agent": "qrdqn"}, {"env_step": 2200000, "rew": 6793.5, "rew_std": 2207.1540499022717, "Agent": "qrdqn"}, {"env_step": 2300000, "rew": 8352.75, "rew_std": 2463.5217296585797, "Agent": "qrdqn"}, {"env_step": 2400000, "rew": 10017.0, "rew_std": 1099.753836092423, "Agent": "qrdqn"}, {"env_step": 2500000, "rew": 9378.25, "rew_std": 2206.291869291096, "Agent": "qrdqn"}, {"env_step": 2600000, "rew": 9277.75, "rew_std": 2164.6920826066694, "Agent": "qrdqn"}, {"env_step": 2700000, "rew": 9680.25, "rew_std": 1852.4255889238843, "Agent": "qrdqn"}, {"env_step": 2800000, "rew": 9750.0, "rew_std": 3101.0985956592867, "Agent": "qrdqn"}, {"env_step": 2900000, "rew": 11197.0, "rew_std": 2089.198650200598, "Agent": "qrdqn"}, {"env_step": 3000000, "rew": 10168.5, "rew_std": 1820.62976741566, "Agent": "qrdqn"}, {"env_step": 3100000, "rew": 10809.0, "rew_std": 1863.6564195151423, "Agent": "qrdqn"}, {"env_step": 3200000, "rew": 11434.75, "rew_std": 1928.14951767232, "Agent": "qrdqn"}, {"env_step": 3300000, "rew": 12635.0, "rew_std": 2041.877812211103, "Agent": "qrdqn"}, {"env_step": 3400000, "rew": 11676.0, "rew_std": 3368.622715591641, "Agent": "qrdqn"}, {"env_step": 3500000, "rew": 11960.0, "rew_std": 1950.4877595104256, "Agent": "qrdqn"}, {"env_step": 3600000, "rew": 11736.0, "rew_std": 2031.8129835198908, "Agent": "qrdqn"}, {"env_step": 3700000, "rew": 12507.25, "rew_std": 1577.2018141315968, "Agent": "qrdqn"}, {"env_step": 3800000, "rew": 12923.5, "rew_std": 4095.112208474879, "Agent": "qrdqn"}, {"env_step": 3900000, "rew": 13316.75, "rew_std": 1166.7872824555468, "Agent": "qrdqn"}, {"env_step": 4000000, "rew": 13060.0, "rew_std": 2080.1246957814815, "Agent": "qrdqn"}, {"env_step": 4100000, "rew": 12532.75, "rew_std": 1183.963919424912, "Agent": "qrdqn"}, {"env_step": 4200000, "rew": 12320.25, "rew_std": 2122.9921249265153, "Agent": "qrdqn"}, {"env_step": 4300000, "rew": 12833.5, "rew_std": 1463.2879074194525, "Agent": "qrdqn"}, {"env_step": 4400000, "rew": 12643.5, "rew_std": 1230.7717091321201, "Agent": "qrdqn"}, {"env_step": 4500000, "rew": 12753.5, "rew_std": 2467.2244526998347, "Agent": "qrdqn"}, {"env_step": 4600000, "rew": 14206.0, "rew_std": 934.261874422798, "Agent": "qrdqn"}, {"env_step": 4700000, "rew": 13566.0, "rew_std": 1616.8879058240248, "Agent": "qrdqn"}, {"env_step": 4800000, "rew": 13339.0, "rew_std": 2508.4644705476694, "Agent": "qrdqn"}, {"env_step": 4900000, "rew": 13325.5, "rew_std": 1697.8286868821601, "Agent": "qrdqn"}, {"env_step": 5000000, "rew": 13318.25, "rew_std": 1575.3479972691748, "Agent": "qrdqn"}, {"env_step": 5100000, "rew": 12695.25, "rew_std": 1818.0165875205869, "Agent": "qrdqn"}, {"env_step": 5200000, "rew": 13957.5, "rew_std": 1218.8980679285696, "Agent": "qrdqn"}, {"env_step": 5300000, "rew": 13959.75, "rew_std": 1010.0305997839868, "Agent": "qrdqn"}, {"env_step": 5400000, "rew": 13414.0, "rew_std": 1498.4079884997943, "Agent": "qrdqn"}, {"env_step": 5500000, "rew": 12775.5, "rew_std": 1296.2314608124584, "Agent": "qrdqn"}, {"env_step": 5600000, "rew": 14213.75, "rew_std": 1282.7033220897185, "Agent": "qrdqn"}, {"env_step": 5700000, "rew": 12620.5, "rew_std": 2257.2158735929534, "Agent": "qrdqn"}, {"env_step": 5800000, "rew": 12587.5, "rew_std": 1430.5497195134462, "Agent": "qrdqn"}, {"env_step": 5900000, "rew": 13289.5, "rew_std": 1792.5658286378216, "Agent": "qrdqn"}, {"env_step": 6000000, "rew": 13572.75, "rew_std": 2379.9851496385436, "Agent": "qrdqn"}, {"env_step": 6100000, "rew": 12327.75, "rew_std": 2985.5888133666363, "Agent": "qrdqn"}, {"env_step": 6200000, "rew": 13057.75, "rew_std": 2234.581182794664, "Agent": "qrdqn"}, {"env_step": 6300000, "rew": 13167.75, "rew_std": 2580.7533904850343, "Agent": "qrdqn"}, {"env_step": 6400000, "rew": 14265.0, "rew_std": 1022.666856801373, "Agent": "qrdqn"}, {"env_step": 6500000, "rew": 13314.5, "rew_std": 1621.0269892879637, "Agent": "qrdqn"}, {"env_step": 6600000, "rew": 14761.5, "rew_std": 862.8928091020344, "Agent": "qrdqn"}, {"env_step": 6700000, "rew": 12912.5, "rew_std": 2490.064005201473, "Agent": "qrdqn"}, {"env_step": 6800000, "rew": 13582.25, "rew_std": 1415.470085342675, "Agent": "qrdqn"}, {"env_step": 6900000, "rew": 14093.0, "rew_std": 1151.0564712471755, "Agent": "qrdqn"}, {"env_step": 7000000, "rew": 13608.75, "rew_std": 1454.381419195116, "Agent": "qrdqn"}, {"env_step": 7100000, "rew": 14457.25, "rew_std": 1426.2934699773396, "Agent": "qrdqn"}, {"env_step": 7200000, "rew": 14363.5, "rew_std": 1147.3579432766394, "Agent": "qrdqn"}, {"env_step": 7300000, "rew": 14335.75, "rew_std": 1048.029132467223, "Agent": "qrdqn"}, {"env_step": 7400000, "rew": 14255.0, "rew_std": 996.4361494847525, "Agent": "qrdqn"}, {"env_step": 7500000, "rew": 13165.0, "rew_std": 2007.4355531373853, "Agent": "qrdqn"}, {"env_step": 7600000, "rew": 13882.25, "rew_std": 1050.5673764685444, "Agent": "qrdqn"}, {"env_step": 7700000, "rew": 14029.25, "rew_std": 1288.9787866757156, "Agent": "qrdqn"}, {"env_step": 7800000, "rew": 13062.75, "rew_std": 2194.7472662017376, "Agent": "qrdqn"}, {"env_step": 7900000, "rew": 13878.75, "rew_std": 1196.7911524154915, "Agent": "qrdqn"}, {"env_step": 8000000, "rew": 14246.25, "rew_std": 1554.0568884374857, "Agent": "qrdqn"}, {"env_step": 8100000, "rew": 14211.5, "rew_std": 1194.981276003938, "Agent": "qrdqn"}, {"env_step": 8200000, "rew": 14197.0, "rew_std": 1123.807034147767, "Agent": "qrdqn"}, {"env_step": 8300000, "rew": 13508.0, "rew_std": 1345.116073058381, "Agent": "qrdqn"}, {"env_step": 8400000, "rew": 11739.5, "rew_std": 2172.110954808709, "Agent": "qrdqn"}, {"env_step": 8500000, "rew": 13295.5, "rew_std": 1875.5738455203516, "Agent": "qrdqn"}, {"env_step": 8600000, "rew": 14682.0, "rew_std": 657.4094994750228, "Agent": "qrdqn"}, {"env_step": 8700000, "rew": 13262.75, "rew_std": 2101.4068055709727, "Agent": "qrdqn"}, {"env_step": 8800000, "rew": 13034.25, "rew_std": 2962.464567298654, "Agent": "qrdqn"}, {"env_step": 8900000, "rew": 13833.25, "rew_std": 1593.2596500570771, "Agent": "qrdqn"}, {"env_step": 9000000, "rew": 13900.75, "rew_std": 1380.3591244672525, "Agent": "qrdqn"}, {"env_step": 9100000, "rew": 13849.5, "rew_std": 1837.9743605393412, "Agent": "qrdqn"}, {"env_step": 9200000, "rew": 12643.25, "rew_std": 2829.352631345199, "Agent": "qrdqn"}, {"env_step": 9300000, "rew": 13530.75, "rew_std": 1416.5393790855233, "Agent": "qrdqn"}, {"env_step": 9400000, "rew": 13982.5, "rew_std": 1845.6198145880423, "Agent": "qrdqn"}, {"env_step": 9500000, "rew": 13809.25, "rew_std": 1238.4738844642627, "Agent": "qrdqn"}, {"env_step": 9600000, "rew": 12931.5, "rew_std": 1797.4736437567033, "Agent": "qrdqn"}, {"env_step": 9700000, "rew": 14342.75, "rew_std": 649.1700951984773, "Agent": "qrdqn"}, {"env_step": 9800000, "rew": 14729.75, "rew_std": 626.4367984880837, "Agent": "qrdqn"}, {"env_step": 9900000, "rew": 13490.75, "rew_std": 1119.1950511416676, "Agent": "qrdqn"}, {"env_step": 10000000, "rew": 14191.5, "rew_std": 1683.4767595663445, "Agent": "qrdqn"}, {"env_step": 0, "rew": 74.75, "rew_std": 68.97871048374273, "Agent": "iqn"}, {"env_step": 100000, "rew": 305.25, "rew_std": 107.32223674523374, "Agent": "iqn"}, {"env_step": 200000, "rew": 278.5, "rew_std": 60.28266749240614, "Agent": "iqn"}, {"env_step": 300000, "rew": 480.75, "rew_std": 128.17980535170116, "Agent": "iqn"}, {"env_step": 400000, "rew": 580.5, "rew_std": 164.20566372692508, "Agent": "iqn"}, {"env_step": 500000, "rew": 603.5, "rew_std": 163.69254717304634, "Agent": "iqn"}, {"env_step": 600000, "rew": 681.5, "rew_std": 165.10299815569672, "Agent": "iqn"}, {"env_step": 700000, "rew": 779.5, "rew_std": 202.94642150084834, "Agent": "iqn"}, {"env_step": 800000, "rew": 1212.0, "rew_std": 518.2033867122059, "Agent": "iqn"}, {"env_step": 900000, "rew": 1937.0, "rew_std": 1077.2446333122296, "Agent": "iqn"}, {"env_step": 1000000, "rew": 2055.75, "rew_std": 1114.4051384034444, "Agent": "iqn"}, {"env_step": 1100000, "rew": 2164.0, "rew_std": 763.3292212407435, "Agent": "iqn"}, {"env_step": 1200000, "rew": 2717.0, "rew_std": 926.5607103692666, "Agent": "iqn"}, {"env_step": 1300000, "rew": 3349.25, "rew_std": 801.4120740917247, "Agent": "iqn"}, {"env_step": 1400000, "rew": 3172.25, "rew_std": 848.1453663730057, "Agent": "iqn"}, {"env_step": 1500000, "rew": 3463.5, "rew_std": 827.8875225536377, "Agent": "iqn"}, {"env_step": 1600000, "rew": 4035.75, "rew_std": 911.0859248720726, "Agent": "iqn"}, {"env_step": 1700000, "rew": 4497.0, "rew_std": 543.007596631944, "Agent": "iqn"}, {"env_step": 1800000, "rew": 4461.25, "rew_std": 499.12705045909905, "Agent": "iqn"}, {"env_step": 1900000, "rew": 4384.25, "rew_std": 471.4711682595236, "Agent": "iqn"}, {"env_step": 2000000, "rew": 5132.0, "rew_std": 1111.9947167140679, "Agent": "iqn"}, {"env_step": 2100000, "rew": 4575.75, "rew_std": 2275.0469912729277, "Agent": "iqn"}, {"env_step": 2200000, "rew": 5614.5, "rew_std": 1350.1304566596518, "Agent": "iqn"}, {"env_step": 2300000, "rew": 5378.75, "rew_std": 2386.0001178751018, "Agent": "iqn"}, {"env_step": 2400000, "rew": 6720.5, "rew_std": 2223.6897265580915, "Agent": "iqn"}, {"env_step": 2500000, "rew": 7193.75, "rew_std": 1491.2818521325873, "Agent": "iqn"}, {"env_step": 2600000, "rew": 8060.25, "rew_std": 2501.7125259509735, "Agent": "iqn"}, {"env_step": 2700000, "rew": 8047.0, "rew_std": 1672.755511125281, "Agent": "iqn"}, {"env_step": 2800000, "rew": 8176.0, "rew_std": 3218.092447397992, "Agent": "iqn"}, {"env_step": 2900000, "rew": 9079.25, "rew_std": 2817.5170917848927, "Agent": "iqn"}, {"env_step": 3000000, "rew": 9333.5, "rew_std": 1586.5446731813133, "Agent": "iqn"}, {"env_step": 3100000, "rew": 11244.75, "rew_std": 1804.940944324772, "Agent": "iqn"}, {"env_step": 3200000, "rew": 9774.75, "rew_std": 2385.623988079429, "Agent": "iqn"}, {"env_step": 3300000, "rew": 10427.5, "rew_std": 2821.736167681167, "Agent": "iqn"}, {"env_step": 3400000, "rew": 9773.25, "rew_std": 2530.4006723244443, "Agent": "iqn"}, {"env_step": 3500000, "rew": 10958.5, "rew_std": 1914.0373559572968, "Agent": "iqn"}, {"env_step": 3600000, "rew": 11481.25, "rew_std": 2320.765027420915, "Agent": "iqn"}, {"env_step": 3700000, "rew": 10402.0, "rew_std": 2840.605525235773, "Agent": "iqn"}, {"env_step": 3800000, "rew": 11571.25, "rew_std": 1838.5531845720427, "Agent": "iqn"}, {"env_step": 3900000, "rew": 12558.75, "rew_std": 1597.0246749815733, "Agent": "iqn"}, {"env_step": 4000000, "rew": 12249.5, "rew_std": 1836.1981102266716, "Agent": "iqn"}, {"env_step": 4100000, "rew": 12411.5, "rew_std": 1798.764228574718, "Agent": "iqn"}, {"env_step": 4200000, "rew": 12926.75, "rew_std": 1323.884459648953, "Agent": "iqn"}, {"env_step": 4300000, "rew": 11794.75, "rew_std": 2639.6958750015124, "Agent": "iqn"}, {"env_step": 4400000, "rew": 12201.0, "rew_std": 1702.2159087495334, "Agent": "iqn"}, {"env_step": 4500000, "rew": 12271.25, "rew_std": 1584.632548100663, "Agent": "iqn"}, {"env_step": 4600000, "rew": 12395.25, "rew_std": 1911.1424757196937, "Agent": "iqn"}, {"env_step": 4700000, "rew": 12780.0, "rew_std": 1188.934396844502, "Agent": "iqn"}, {"env_step": 4800000, "rew": 12680.5, "rew_std": 1798.4388368804762, "Agent": "iqn"}, {"env_step": 4900000, "rew": 11659.0, "rew_std": 1524.3105818697186, "Agent": "iqn"}, {"env_step": 5000000, "rew": 12834.25, "rew_std": 1934.9157119885094, "Agent": "iqn"}, {"env_step": 5100000, "rew": 13496.0, "rew_std": 1634.7783488901484, "Agent": "iqn"}, {"env_step": 5200000, "rew": 13142.75, "rew_std": 1530.6499640675527, "Agent": "iqn"}, {"env_step": 5300000, "rew": 12664.75, "rew_std": 2404.7719356521106, "Agent": "iqn"}, {"env_step": 5400000, "rew": 12944.25, "rew_std": 2205.103186361128, "Agent": "iqn"}, {"env_step": 5500000, "rew": 13810.25, "rew_std": 2059.245750875791, "Agent": "iqn"}, {"env_step": 5600000, "rew": 13504.0, "rew_std": 849.1313208214616, "Agent": "iqn"}, {"env_step": 5700000, "rew": 13502.25, "rew_std": 1435.742599667503, "Agent": "iqn"}, {"env_step": 5800000, "rew": 14175.25, "rew_std": 1070.7231493247916, "Agent": "iqn"}, {"env_step": 5900000, "rew": 13746.0, "rew_std": 1353.6211619208677, "Agent": "iqn"}, {"env_step": 6000000, "rew": 14359.75, "rew_std": 987.1046360442241, "Agent": "iqn"}, {"env_step": 6100000, "rew": 13638.25, "rew_std": 2135.9354069119227, "Agent": "iqn"}, {"env_step": 6200000, "rew": 14398.0, "rew_std": 724.5531381479208, "Agent": "iqn"}, {"env_step": 6300000, "rew": 13681.25, "rew_std": 1508.860600751441, "Agent": "iqn"}, {"env_step": 6400000, "rew": 12862.0, "rew_std": 2345.081501781974, "Agent": "iqn"}, {"env_step": 6500000, "rew": 12578.5, "rew_std": 3452.268855405094, "Agent": "iqn"}, {"env_step": 6600000, "rew": 13525.25, "rew_std": 1754.864115109771, "Agent": "iqn"}, {"env_step": 6700000, "rew": 14026.75, "rew_std": 1140.708688710663, "Agent": "iqn"}, {"env_step": 6800000, "rew": 14103.75, "rew_std": 1377.3363106010092, "Agent": "iqn"}, {"env_step": 6900000, "rew": 13723.5, "rew_std": 1402.6114928945933, "Agent": "iqn"}, {"env_step": 7000000, "rew": 13494.25, "rew_std": 997.4141880382492, "Agent": "iqn"}, {"env_step": 7100000, "rew": 14152.25, "rew_std": 709.2394253705867, "Agent": "iqn"}, {"env_step": 7200000, "rew": 13685.25, "rew_std": 1417.7761856160514, "Agent": "iqn"}, {"env_step": 7300000, "rew": 13408.25, "rew_std": 2077.5096419752185, "Agent": "iqn"}, {"env_step": 7400000, "rew": 14233.0, "rew_std": 909.7477672410084, "Agent": "iqn"}, {"env_step": 7500000, "rew": 14091.5, "rew_std": 743.8003764451857, "Agent": "iqn"}, {"env_step": 7600000, "rew": 13211.75, "rew_std": 1589.0996074821742, "Agent": "iqn"}, {"env_step": 7700000, "rew": 13444.5, "rew_std": 1892.1039215645635, "Agent": "iqn"}, {"env_step": 7800000, "rew": 13603.25, "rew_std": 2529.7435764322045, "Agent": "iqn"}, {"env_step": 7900000, "rew": 13292.25, "rew_std": 3160.3117824828614, "Agent": "iqn"}, {"env_step": 8000000, "rew": 14121.75, "rew_std": 818.3054518332382, "Agent": "iqn"}, {"env_step": 8100000, "rew": 14027.0, "rew_std": 721.3241296393737, "Agent": "iqn"}, {"env_step": 8200000, "rew": 14095.25, "rew_std": 599.5054732861078, "Agent": "iqn"}, {"env_step": 8300000, "rew": 14409.25, "rew_std": 808.6462839214684, "Agent": "iqn"}, {"env_step": 8400000, "rew": 13536.75, "rew_std": 753.1708388539747, "Agent": "iqn"}, {"env_step": 8500000, "rew": 13976.5, "rew_std": 988.6829623291786, "Agent": "iqn"}, {"env_step": 8600000, "rew": 13914.5, "rew_std": 1239.5683724587361, "Agent": "iqn"}, {"env_step": 8700000, "rew": 14257.0, "rew_std": 1150.6013645046662, "Agent": "iqn"}, {"env_step": 8800000, "rew": 13446.5, "rew_std": 1551.8111193054392, "Agent": "iqn"}, {"env_step": 8900000, "rew": 14032.5, "rew_std": 1186.8413963120768, "Agent": "iqn"}, {"env_step": 9000000, "rew": 14378.5, "rew_std": 943.7049326987753, "Agent": "iqn"}, {"env_step": 9100000, "rew": 14320.75, "rew_std": 647.3224177332344, "Agent": "iqn"}, {"env_step": 9200000, "rew": 13960.25, "rew_std": 1017.343630490701, "Agent": "iqn"}, {"env_step": 9300000, "rew": 13514.25, "rew_std": 1402.1367845185434, "Agent": "iqn"}, {"env_step": 9400000, "rew": 13712.25, "rew_std": 1607.3042065831844, "Agent": "iqn"}, {"env_step": 9500000, "rew": 14267.75, "rew_std": 724.1317645981289, "Agent": "iqn"}, {"env_step": 9600000, "rew": 14351.75, "rew_std": 780.0296869350551, "Agent": "iqn"}, {"env_step": 9700000, "rew": 13220.25, "rew_std": 1425.2001833075942, "Agent": "iqn"}, {"env_step": 9800000, "rew": 14156.5, "rew_std": 853.8107225843443, "Agent": "iqn"}, {"env_step": 9900000, "rew": 14273.75, "rew_std": 895.3443820675931, "Agent": "iqn"}, {"env_step": 10000000, "rew": 13774.75, "rew_std": 1513.8219223211163, "Agent": "iqn"}, {"env_step": 0, "rew": 45.5, "rew_std": 47.75981574503821, "Agent": "rainbow"}, {"env_step": 100000, "rew": 284.5, "rew_std": 61.47967143698801, "Agent": "rainbow"}, {"env_step": 200000, "rew": 285.0, "rew_std": 74.47314952383846, "Agent": "rainbow"}, {"env_step": 300000, "rew": 377.75, "rew_std": 92.13746523537534, "Agent": "rainbow"}, {"env_step": 400000, "rew": 395.75, "rew_std": 96.40442157909564, "Agent": "rainbow"}, {"env_step": 500000, "rew": 446.5, "rew_std": 135.95587519485872, "Agent": "rainbow"}, {"env_step": 600000, "rew": 509.0, "rew_std": 112.18400064180275, "Agent": "rainbow"}, {"env_step": 700000, "rew": 842.0, "rew_std": 379.03957577012983, "Agent": "rainbow"}, {"env_step": 800000, "rew": 841.25, "rew_std": 334.61031439571616, "Agent": "rainbow"}, {"env_step": 900000, "rew": 1965.0, "rew_std": 1128.5698914998575, "Agent": "rainbow"}, {"env_step": 1000000, "rew": 2198.25, "rew_std": 836.4859906178943, "Agent": "rainbow"}, {"env_step": 1100000, "rew": 3015.75, "rew_std": 848.7866707836546, "Agent": "rainbow"}, {"env_step": 1200000, "rew": 2877.0, "rew_std": 996.3312702108672, "Agent": "rainbow"}, {"env_step": 1300000, "rew": 3242.0, "rew_std": 876.3666470148211, "Agent": "rainbow"}, {"env_step": 1400000, "rew": 3739.5, "rew_std": 779.8091112573641, "Agent": "rainbow"}, {"env_step": 1500000, "rew": 3878.5, "rew_std": 610.4621200369438, "Agent": "rainbow"}, {"env_step": 1600000, "rew": 3686.75, "rew_std": 1020.1911891895558, "Agent": "rainbow"}, {"env_step": 1700000, "rew": 3802.5, "rew_std": 775.4450335130144, "Agent": "rainbow"}, {"env_step": 1800000, "rew": 4826.75, "rew_std": 1208.2508276430024, "Agent": "rainbow"}, {"env_step": 1900000, "rew": 5678.25, "rew_std": 1521.4220527191, "Agent": "rainbow"}, {"env_step": 2000000, "rew": 5642.5, "rew_std": 2018.9904655545058, "Agent": "rainbow"}, {"env_step": 2100000, "rew": 7018.0, "rew_std": 2637.9750283124363, "Agent": "rainbow"}, {"env_step": 2200000, "rew": 6920.25, "rew_std": 1881.316178237991, "Agent": "rainbow"}, {"env_step": 2300000, "rew": 7435.0, "rew_std": 1537.6528379318916, "Agent": "rainbow"}, {"env_step": 2400000, "rew": 7692.5, "rew_std": 1343.7070923382075, "Agent": "rainbow"}, {"env_step": 2500000, "rew": 8006.25, "rew_std": 1876.4488568836616, "Agent": "rainbow"}, {"env_step": 2600000, "rew": 9979.75, "rew_std": 2021.7954427933603, "Agent": "rainbow"}, {"env_step": 2700000, "rew": 9089.75, "rew_std": 1605.1473647301048, "Agent": "rainbow"}, {"env_step": 2800000, "rew": 8764.75, "rew_std": 1827.6663569973596, "Agent": "rainbow"}, {"env_step": 2900000, "rew": 9663.0, "rew_std": 2015.3541252097607, "Agent": "rainbow"}, {"env_step": 3000000, "rew": 9934.5, "rew_std": 2286.617261371041, "Agent": "rainbow"}, {"env_step": 3100000, "rew": 10924.25, "rew_std": 2628.7715881186787, "Agent": "rainbow"}, {"env_step": 3200000, "rew": 9174.75, "rew_std": 1997.3590219337134, "Agent": "rainbow"}, {"env_step": 3300000, "rew": 10324.25, "rew_std": 1182.9740328933683, "Agent": "rainbow"}, {"env_step": 3400000, "rew": 10506.5, "rew_std": 1664.2221155843351, "Agent": "rainbow"}, {"env_step": 3500000, "rew": 10675.0, "rew_std": 2079.8194032175006, "Agent": "rainbow"}, {"env_step": 3600000, "rew": 10794.25, "rew_std": 2335.775848085599, "Agent": "rainbow"}, {"env_step": 3700000, "rew": 10830.25, "rew_std": 2143.70282746933, "Agent": "rainbow"}, {"env_step": 3800000, "rew": 11664.75, "rew_std": 1526.7708775386043, "Agent": "rainbow"}, {"env_step": 3900000, "rew": 10242.5, "rew_std": 2334.839662589275, "Agent": "rainbow"}, {"env_step": 4000000, "rew": 11877.25, "rew_std": 1986.8088263594966, "Agent": "rainbow"}, {"env_step": 4100000, "rew": 11280.25, "rew_std": 2765.102721871287, "Agent": "rainbow"}, {"env_step": 4200000, "rew": 12994.5, "rew_std": 1754.1610530393154, "Agent": "rainbow"}, {"env_step": 4300000, "rew": 10860.25, "rew_std": 1974.6045331913933, "Agent": "rainbow"}, {"env_step": 4400000, "rew": 10636.25, "rew_std": 2674.058537597859, "Agent": "rainbow"}, {"env_step": 4500000, "rew": 12535.5, "rew_std": 1929.7117012652434, "Agent": "rainbow"}, {"env_step": 4600000, "rew": 12290.5, "rew_std": 1829.934015203827, "Agent": "rainbow"}, {"env_step": 4700000, "rew": 12177.5, "rew_std": 946.7675269040442, "Agent": "rainbow"}, {"env_step": 4800000, "rew": 13175.75, "rew_std": 1413.9178945398492, "Agent": "rainbow"}, {"env_step": 4900000, "rew": 12883.5, "rew_std": 1610.216677966043, "Agent": "rainbow"}, {"env_step": 5000000, "rew": 12284.5, "rew_std": 1809.4221453270654, "Agent": "rainbow"}, {"env_step": 5100000, "rew": 12318.0, "rew_std": 2168.0633062712905, "Agent": "rainbow"}, {"env_step": 5200000, "rew": 12730.25, "rew_std": 1575.0005753967203, "Agent": "rainbow"}, {"env_step": 5300000, "rew": 11980.25, "rew_std": 1916.1492798057254, "Agent": "rainbow"}, {"env_step": 5400000, "rew": 12032.75, "rew_std": 2195.3403637021756, "Agent": "rainbow"}, {"env_step": 5500000, "rew": 12618.0, "rew_std": 2118.3926099757805, "Agent": "rainbow"}, {"env_step": 5600000, "rew": 13014.25, "rew_std": 1486.7145195026515, "Agent": "rainbow"}, {"env_step": 5700000, "rew": 12690.0, "rew_std": 1458.9743829142444, "Agent": "rainbow"}, {"env_step": 5800000, "rew": 12033.5, "rew_std": 1977.8407418192194, "Agent": "rainbow"}, {"env_step": 5900000, "rew": 12640.25, "rew_std": 2624.1961745456456, "Agent": "rainbow"}, {"env_step": 6000000, "rew": 13131.25, "rew_std": 1906.204097807997, "Agent": "rainbow"}, {"env_step": 6100000, "rew": 13501.75, "rew_std": 1226.5200008560805, "Agent": "rainbow"}, {"env_step": 6200000, "rew": 13880.0, "rew_std": 1096.272662251504, "Agent": "rainbow"}, {"env_step": 6300000, "rew": 12978.75, "rew_std": 1734.9788363262533, "Agent": "rainbow"}, {"env_step": 6400000, "rew": 12417.0, "rew_std": 1276.8250663266288, "Agent": "rainbow"}, {"env_step": 6500000, "rew": 13424.5, "rew_std": 1740.133543725883, "Agent": "rainbow"}, {"env_step": 6600000, "rew": 13237.0, "rew_std": 1644.9296337533713, "Agent": "rainbow"}, {"env_step": 6700000, "rew": 13351.75, "rew_std": 1120.8969901378093, "Agent": "rainbow"}, {"env_step": 6800000, "rew": 12263.0, "rew_std": 2282.7893573433357, "Agent": "rainbow"}, {"env_step": 6900000, "rew": 12439.0, "rew_std": 2598.2990108915487, "Agent": "rainbow"}, {"env_step": 7000000, "rew": 14034.5, "rew_std": 744.4837137238128, "Agent": "rainbow"}, {"env_step": 7100000, "rew": 13683.25, "rew_std": 901.8238810876545, "Agent": "rainbow"}, {"env_step": 7200000, "rew": 14111.25, "rew_std": 1060.766379793402, "Agent": "rainbow"}, {"env_step": 7300000, "rew": 13421.75, "rew_std": 1568.2095881928537, "Agent": "rainbow"}, {"env_step": 7400000, "rew": 14206.5, "rew_std": 607.6617068731582, "Agent": "rainbow"}, {"env_step": 7500000, "rew": 13354.25, "rew_std": 1601.6690708445362, "Agent": "rainbow"}, {"env_step": 7600000, "rew": 13701.75, "rew_std": 1030.2663793893305, "Agent": "rainbow"}, {"env_step": 7700000, "rew": 13039.0, "rew_std": 2426.61317477673, "Agent": "rainbow"}, {"env_step": 7800000, "rew": 13988.25, "rew_std": 1832.0964937742772, "Agent": "rainbow"}, {"env_step": 7900000, "rew": 13303.0, "rew_std": 1248.9764809635128, "Agent": "rainbow"}, {"env_step": 8000000, "rew": 13551.25, "rew_std": 1319.6809510256637, "Agent": "rainbow"}, {"env_step": 8100000, "rew": 13257.25, "rew_std": 1674.4097654098891, "Agent": "rainbow"}, {"env_step": 8200000, "rew": 13652.5, "rew_std": 1983.858613913804, "Agent": "rainbow"}, {"env_step": 8300000, "rew": 13802.5, "rew_std": 1365.1304882684292, "Agent": "rainbow"}, {"env_step": 8400000, "rew": 13834.5, "rew_std": 1055.2753195256678, "Agent": "rainbow"}, {"env_step": 8500000, "rew": 14132.75, "rew_std": 759.6795788357089, "Agent": "rainbow"}, {"env_step": 8600000, "rew": 13816.5, "rew_std": 838.3877384599563, "Agent": "rainbow"}, {"env_step": 8700000, "rew": 13764.0, "rew_std": 1449.42367857021, "Agent": "rainbow"}, {"env_step": 8800000, "rew": 13053.75, "rew_std": 1003.1601629351118, "Agent": "rainbow"}, {"env_step": 8900000, "rew": 13302.75, "rew_std": 1787.7211226866455, "Agent": "rainbow"}, {"env_step": 9000000, "rew": 13252.75, "rew_std": 1108.8256456720326, "Agent": "rainbow"}, {"env_step": 9100000, "rew": 13711.75, "rew_std": 1272.3845969281458, "Agent": "rainbow"}, {"env_step": 9200000, "rew": 13983.5, "rew_std": 1598.7660085203213, "Agent": "rainbow"}, {"env_step": 9300000, "rew": 13033.25, "rew_std": 1330.3514808124958, "Agent": "rainbow"}, {"env_step": 9400000, "rew": 14224.75, "rew_std": 1230.1089636694792, "Agent": "rainbow"}, {"env_step": 9500000, "rew": 13983.25, "rew_std": 1469.7389436563217, "Agent": "rainbow"}, {"env_step": 9600000, "rew": 12979.5, "rew_std": 1610.91891478125, "Agent": "rainbow"}, {"env_step": 9700000, "rew": 13711.25, "rew_std": 1179.091413970944, "Agent": "rainbow"}, {"env_step": 9800000, "rew": 13414.0, "rew_std": 2159.0412223947924, "Agent": "rainbow"}, {"env_step": 9900000, "rew": 13838.75, "rew_std": 1349.4764030912138, "Agent": "rainbow"}, {"env_step": 10000000, "rew": 14035.0, "rew_std": 1246.866572653225, "Agent": "rainbow"}, {"env_step": 0, "rew": 120.5, "rew_std": 101.5, "Agent": "ppo"}, {"env_step": 100000, "rew": 273.0, "rew_std": 28.956864471140516, "Agent": "ppo"}, {"env_step": 200000, "rew": 355.0, "rew_std": 89.81230427953622, "Agent": "ppo"}, {"env_step": 300000, "rew": 391.5, "rew_std": 92.91931984253867, "Agent": "ppo"}, {"env_step": 400000, "rew": 474.0, "rew_std": 108.15035829806575, "Agent": "ppo"}, {"env_step": 500000, "rew": 542.75, "rew_std": 105.84452040611266, "Agent": "ppo"}, {"env_step": 600000, "rew": 621.75, "rew_std": 77.6695725493581, "Agent": "ppo"}, {"env_step": 700000, "rew": 641.75, "rew_std": 85.77623505377233, "Agent": "ppo"}, {"env_step": 800000, "rew": 672.25, "rew_std": 64.53148456373835, "Agent": "ppo"}, {"env_step": 900000, "rew": 744.75, "rew_std": 134.25465541276398, "Agent": "ppo"}, {"env_step": 1000000, "rew": 791.25, "rew_std": 143.7891251103504, "Agent": "ppo"}, {"env_step": 1100000, "rew": 995.0, "rew_std": 389.2460661329797, "Agent": "ppo"}, {"env_step": 1200000, "rew": 817.25, "rew_std": 168.63588734311568, "Agent": "ppo"}, {"env_step": 1300000, "rew": 1099.0, "rew_std": 655.2797112684018, "Agent": "ppo"}, {"env_step": 1400000, "rew": 1188.0, "rew_std": 663.5, "Agent": "ppo"}, {"env_step": 1500000, "rew": 1322.0, "rew_std": 450.0927682156202, "Agent": "ppo"}, {"env_step": 1600000, "rew": 1452.75, "rew_std": 704.6368302182337, "Agent": "ppo"}, {"env_step": 1700000, "rew": 1558.5, "rew_std": 423.40170051618827, "Agent": "ppo"}, {"env_step": 1800000, "rew": 1552.75, "rew_std": 663.306537356598, "Agent": "ppo"}, {"env_step": 1900000, "rew": 1814.25, "rew_std": 756.1093257591788, "Agent": "ppo"}, {"env_step": 2000000, "rew": 1824.0, "rew_std": 703.5785315087435, "Agent": "ppo"}, {"env_step": 2100000, "rew": 1752.25, "rew_std": 750.6567541160207, "Agent": "ppo"}, {"env_step": 2200000, "rew": 2510.75, "rew_std": 872.3481601402045, "Agent": "ppo"}, {"env_step": 2300000, "rew": 2298.25, "rew_std": 906.4326022931876, "Agent": "ppo"}, {"env_step": 2400000, "rew": 2231.0, "rew_std": 897.2521942018309, "Agent": "ppo"}, {"env_step": 2500000, "rew": 2028.0, "rew_std": 938.4780231843471, "Agent": "ppo"}, {"env_step": 2600000, "rew": 2503.25, "rew_std": 949.5203065232465, "Agent": "ppo"}, {"env_step": 2700000, "rew": 2804.5, "rew_std": 959.1681291619317, "Agent": "ppo"}, {"env_step": 2800000, "rew": 2946.25, "rew_std": 708.6265324555665, "Agent": "ppo"}, {"env_step": 2900000, "rew": 3231.75, "rew_std": 616.26298160769, "Agent": "ppo"}, {"env_step": 3000000, "rew": 2883.25, "rew_std": 727.0738012746712, "Agent": "ppo"}, {"env_step": 3100000, "rew": 3300.5, "rew_std": 795.6183130119617, "Agent": "ppo"}, {"env_step": 3200000, "rew": 3390.5, "rew_std": 828.5211222413102, "Agent": "ppo"}, {"env_step": 3300000, "rew": 3235.5, "rew_std": 996.1192197724126, "Agent": "ppo"}, {"env_step": 3400000, "rew": 3114.0, "rew_std": 1074.6076028020648, "Agent": "ppo"}, {"env_step": 3500000, "rew": 3412.75, "rew_std": 1089.8081539885816, "Agent": "ppo"}, {"env_step": 3600000, "rew": 3153.75, "rew_std": 1106.7566636347847, "Agent": "ppo"}, {"env_step": 3700000, "rew": 3294.75, "rew_std": 694.6846856668138, "Agent": "ppo"}, {"env_step": 3800000, "rew": 3217.0, "rew_std": 1153.753548206895, "Agent": "ppo"}, {"env_step": 3900000, "rew": 3735.5, "rew_std": 868.4992803681532, "Agent": "ppo"}, {"env_step": 4000000, "rew": 3744.0, "rew_std": 798.573885122723, "Agent": "ppo"}, {"env_step": 4100000, "rew": 3626.75, "rew_std": 879.9460565852886, "Agent": "ppo"}, {"env_step": 4200000, "rew": 3621.5, "rew_std": 977.4035758068414, "Agent": "ppo"}, {"env_step": 4300000, "rew": 3884.5, "rew_std": 623.5030072100695, "Agent": "ppo"}, {"env_step": 4400000, "rew": 3692.25, "rew_std": 711.3521016346265, "Agent": "ppo"}, {"env_step": 4500000, "rew": 3992.75, "rew_std": 715.286175247362, "Agent": "ppo"}, {"env_step": 4600000, "rew": 4163.0, "rew_std": 830.2919667201412, "Agent": "ppo"}, {"env_step": 4700000, "rew": 4100.75, "rew_std": 683.023654422012, "Agent": "ppo"}, {"env_step": 4800000, "rew": 4077.5, "rew_std": 490.6844709179209, "Agent": "ppo"}, {"env_step": 4900000, "rew": 4007.25, "rew_std": 496.95126773155533, "Agent": "ppo"}, {"env_step": 5000000, "rew": 4787.5, "rew_std": 1021.3936557468918, "Agent": "ppo"}, {"env_step": 5100000, "rew": 4553.0, "rew_std": 615.2263810988602, "Agent": "ppo"}, {"env_step": 5200000, "rew": 4548.75, "rew_std": 416.08029573629176, "Agent": "ppo"}, {"env_step": 5300000, "rew": 4595.0, "rew_std": 509.9178855462907, "Agent": "ppo"}, {"env_step": 5400000, "rew": 5037.5, "rew_std": 584.2281232532374, "Agent": "ppo"}, {"env_step": 5500000, "rew": 5001.75, "rew_std": 1064.1552107188124, "Agent": "ppo"}, {"env_step": 5600000, "rew": 5132.75, "rew_std": 1378.285370487549, "Agent": "ppo"}, {"env_step": 5700000, "rew": 5175.5, "rew_std": 1010.7384676561984, "Agent": "ppo"}, {"env_step": 5800000, "rew": 4833.5, "rew_std": 789.5474969879899, "Agent": "ppo"}, {"env_step": 5900000, "rew": 5724.0, "rew_std": 707.8031152799484, "Agent": "ppo"}, {"env_step": 6000000, "rew": 6142.5, "rew_std": 1675.208569104158, "Agent": "ppo"}, {"env_step": 6100000, "rew": 6317.0, "rew_std": 1503.244324785562, "Agent": "ppo"}, {"env_step": 6200000, "rew": 6381.75, "rew_std": 1400.6998473977214, "Agent": "ppo"}, {"env_step": 6300000, "rew": 6283.0, "rew_std": 1507.1785726980065, "Agent": "ppo"}, {"env_step": 6400000, "rew": 6748.0, "rew_std": 1430.778983630945, "Agent": "ppo"}, {"env_step": 6500000, "rew": 7201.75, "rew_std": 1294.4265380854952, "Agent": "ppo"}, {"env_step": 6600000, "rew": 6559.0, "rew_std": 1157.9767916499882, "Agent": "ppo"}, {"env_step": 6700000, "rew": 7433.5, "rew_std": 1716.4509896877335, "Agent": "ppo"}, {"env_step": 6800000, "rew": 7610.5, "rew_std": 1812.574412265604, "Agent": "ppo"}, {"env_step": 6900000, "rew": 8195.0, "rew_std": 1976.1841386874858, "Agent": "ppo"}, {"env_step": 7000000, "rew": 8271.5, "rew_std": 1789.7011622055788, "Agent": "ppo"}, {"env_step": 7100000, "rew": 7825.5, "rew_std": 1272.6767067877058, "Agent": "ppo"}, {"env_step": 7200000, "rew": 8352.75, "rew_std": 1310.4419340436264, "Agent": "ppo"}, {"env_step": 7300000, "rew": 8443.0, "rew_std": 1754.0131841009634, "Agent": "ppo"}, {"env_step": 7400000, "rew": 8361.25, "rew_std": 1613.0232678111001, "Agent": "ppo"}, {"env_step": 7500000, "rew": 8785.5, "rew_std": 1928.0082987373264, "Agent": "ppo"}, {"env_step": 7600000, "rew": 9088.0, "rew_std": 1135.7738551313814, "Agent": "ppo"}, {"env_step": 7700000, "rew": 8585.25, "rew_std": 1348.3320483100592, "Agent": "ppo"}, {"env_step": 7800000, "rew": 8759.25, "rew_std": 1379.0055520192802, "Agent": "ppo"}, {"env_step": 7900000, "rew": 9218.5, "rew_std": 1970.6262329523577, "Agent": "ppo"}, {"env_step": 8000000, "rew": 9573.25, "rew_std": 1635.5530601298144, "Agent": "ppo"}, {"env_step": 8100000, "rew": 10431.25, "rew_std": 1564.9469839262927, "Agent": "ppo"}, {"env_step": 8200000, "rew": 9307.5, "rew_std": 1389.4486316521384, "Agent": "ppo"}, {"env_step": 8300000, "rew": 9908.75, "rew_std": 1632.5357002222033, "Agent": "ppo"}, {"env_step": 8400000, "rew": 10750.5, "rew_std": 2245.378531562106, "Agent": "ppo"}, {"env_step": 8500000, "rew": 10358.5, "rew_std": 2260.2992611599025, "Agent": "ppo"}, {"env_step": 8600000, "rew": 10700.25, "rew_std": 1594.5439982954374, "Agent": "ppo"}, {"env_step": 8700000, "rew": 10038.25, "rew_std": 1889.7635599460584, "Agent": "ppo"}, {"env_step": 8800000, "rew": 9823.0, "rew_std": 1878.6184418343178, "Agent": "ppo"}, {"env_step": 8900000, "rew": 10836.5, "rew_std": 1715.2179890614486, "Agent": "ppo"}, {"env_step": 9000000, "rew": 10589.0, "rew_std": 1656.9747282321478, "Agent": "ppo"}, {"env_step": 9100000, "rew": 10209.75, "rew_std": 1596.2845336906576, "Agent": "ppo"}, {"env_step": 9200000, "rew": 11638.75, "rew_std": 2334.8370526655603, "Agent": "ppo"}, {"env_step": 9300000, "rew": 11236.5, "rew_std": 1308.257046608196, "Agent": "ppo"}, {"env_step": 9400000, "rew": 12341.75, "rew_std": 1760.6944830094742, "Agent": "ppo"}, {"env_step": 9500000, "rew": 11866.0, "rew_std": 1635.246617486182, "Agent": "ppo"}, {"env_step": 9600000, "rew": 11265.5, "rew_std": 1304.1528859761804, "Agent": "ppo"}, {"env_step": 9700000, "rew": 11678.5, "rew_std": 1495.5292541438298, "Agent": "ppo"}, {"env_step": 9800000, "rew": 11504.25, "rew_std": 1666.65422703691, "Agent": "ppo"}, {"env_step": 9900000, "rew": 11494.0, "rew_std": 1494.6768881601133, "Agent": "ppo"}, {"env_step": 10000000, "rew": 12188.5, "rew_std": 1292.4967117946567, "Agent": "ppo"}]
examples/atari/benchmark/SeaquestNoFrameskip-v4/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"env_step": 0, "rew": 32.2, "rew_std": 46.315872009495834, "Agent": "c51"}, {"env_step": 100000, "rew": 150.4, "rew_std": 44.16152171291203, "Agent": "c51"}, {"env_step": 200000, "rew": 128.6, "rew_std": 49.11252386102755, "Agent": "c51"}, {"env_step": 300000, "rew": 247.0, "rew_std": 99.90095094642493, "Agent": "c51"}, {"env_step": 400000, "rew": 316.8, "rew_std": 101.15809409038903, "Agent": "c51"}, {"env_step": 500000, "rew": 294.4, "rew_std": 154.54138604270378, "Agent": "c51"}, {"env_step": 600000, "rew": 319.4, "rew_std": 168.48513287527777, "Agent": "c51"}, {"env_step": 700000, "rew": 447.6, "rew_std": 228.1434636363707, "Agent": "c51"}, {"env_step": 800000, "rew": 584.0, "rew_std": 225.6138293633615, "Agent": "c51"}, {"env_step": 900000, "rew": 728.2, "rew_std": 275.2329195427029, "Agent": "c51"}, {"env_step": 1000000, "rew": 972.4, "rew_std": 346.5323072961596, "Agent": "c51"}, {"env_step": 1100000, "rew": 1153.0, "rew_std": 393.3764100705582, "Agent": "c51"}, {"env_step": 1200000, "rew": 1589.2, "rew_std": 267.4624459620453, "Agent": "c51"}, {"env_step": 1300000, "rew": 1583.4, "rew_std": 262.64508371564847, "Agent": "c51"}, {"env_step": 1400000, "rew": 1678.6, "rew_std": 221.38482332806828, "Agent": "c51"}, {"env_step": 1500000, "rew": 1636.2, "rew_std": 262.5969535238366, "Agent": "c51"}, {"env_step": 1600000, "rew": 1672.2, "rew_std": 191.08835652650322, "Agent": "c51"}, {"env_step": 1700000, "rew": 1610.6, "rew_std": 330.96591969566896, "Agent": "c51"}, {"env_step": 1800000, "rew": 1730.3, "rew_std": 421.8265164733009, "Agent": "c51"}, {"env_step": 1900000, "rew": 1915.1, "rew_std": 466.08463823644735, "Agent": "c51"}, {"env_step": 2000000, "rew": 1765.0, "rew_std": 223.29218526406157, "Agent": "c51"}, {"env_step": 2100000, "rew": 1774.2, "rew_std": 280.23483009790203, "Agent": "c51"}, {"env_step": 2200000, "rew": 1940.0, "rew_std": 288.7337874236405, "Agent": "c51"}, {"env_step": 2300000, "rew": 1953.0, "rew_std": 183.59793027155834, "Agent": "c51"}, {"env_step": 2400000, "rew": 1960.0, "rew_std": 160.45186193996005, "Agent": "c51"}, {"env_step": 2500000, "rew": 1876.8, "rew_std": 178.058866670548, "Agent": "c51"}, {"env_step": 2600000, "rew": 1938.3, "rew_std": 339.1395140646398, "Agent": "c51"}, {"env_step": 2700000, "rew": 2020.2, "rew_std": 316.60505365518094, "Agent": "c51"}, {"env_step": 2800000, "rew": 1987.6, "rew_std": 300.36351309704713, "Agent": "c51"}, {"env_step": 2900000, "rew": 1866.7, "rew_std": 246.95750646619348, "Agent": "c51"}, {"env_step": 3000000, "rew": 1934.9, "rew_std": 380.61383316952634, "Agent": "c51"}, {"env_step": 3100000, "rew": 2063.6, "rew_std": 397.0740988782824, "Agent": "c51"}, {"env_step": 3200000, "rew": 2049.0, "rew_std": 507.7331976540435, "Agent": "c51"}, {"env_step": 3300000, "rew": 2166.0, "rew_std": 523.4004203284518, "Agent": "c51"}, {"env_step": 3400000, "rew": 2154.2, "rew_std": 581.4135877325194, "Agent": "c51"}, {"env_step": 3500000, "rew": 2041.9, "rew_std": 658.6993927430024, "Agent": "c51"}, {"env_step": 3600000, "rew": 2267.9, "rew_std": 511.7625328216203, "Agent": "c51"}, {"env_step": 3700000, "rew": 2240.1, "rew_std": 415.064440779983, "Agent": "c51"}, {"env_step": 3800000, "rew": 2300.7, "rew_std": 460.17693336367915, "Agent": "c51"}, {"env_step": 3900000, "rew": 2148.5, "rew_std": 605.8113980439787, "Agent": "c51"}, {"env_step": 4000000, "rew": 2083.7, "rew_std": 491.9182960614496, "Agent": "c51"}, {"env_step": 4100000, "rew": 2218.3, "rew_std": 504.38795584351533, "Agent": "c51"}, {"env_step": 4200000, "rew": 2268.6, "rew_std": 484.98725756456736, "Agent": "c51"}, {"env_step": 4300000, "rew": 2227.8, "rew_std": 526.6742446712199, "Agent": "c51"}, {"env_step": 4400000, "rew": 2411.4, "rew_std": 649.1550200067777, "Agent": "c51"}, {"env_step": 4500000, "rew": 2175.5, "rew_std": 458.5758933917046, "Agent": "c51"}, {"env_step": 4600000, "rew": 2318.9, "rew_std": 604.9889998999981, "Agent": "c51"}, {"env_step": 4700000, "rew": 2327.4, "rew_std": 395.89195495741006, "Agent": "c51"}, {"env_step": 4800000, "rew": 2369.6, "rew_std": 508.4897639087733, "Agent": "c51"}, {"env_step": 4900000, "rew": 2172.9, "rew_std": 587.6421445063313, "Agent": "c51"}, {"env_step": 5000000, "rew": 2279.5, "rew_std": 422.98540163934734, "Agent": "c51"}, {"env_step": 5100000, "rew": 2513.7, "rew_std": 757.7894232568834, "Agent": "c51"}, {"env_step": 5200000, "rew": 2347.0, "rew_std": 447.44988546204814, "Agent": "c51"}, {"env_step": 5300000, "rew": 2241.3, "rew_std": 527.0457380531599, "Agent": "c51"}, {"env_step": 5400000, "rew": 2434.3, "rew_std": 574.2675421787305, "Agent": "c51"}, {"env_step": 5500000, "rew": 2543.6, "rew_std": 672.8527624971157, "Agent": "c51"}, {"env_step": 5600000, "rew": 2479.0, "rew_std": 647.793794351258, "Agent": "c51"}, {"env_step": 5700000, "rew": 2461.7, "rew_std": 558.4800891705988, "Agent": "c51"}, {"env_step": 5800000, "rew": 2491.6, "rew_std": 587.2570476375741, "Agent": "c51"}, {"env_step": 5900000, "rew": 2470.1, "rew_std": 801.6780463502789, "Agent": "c51"}, {"env_step": 6000000, "rew": 2324.7, "rew_std": 429.707353904957, "Agent": "c51"}, {"env_step": 6100000, "rew": 2391.7, "rew_std": 472.14575080159307, "Agent": "c51"}, {"env_step": 6200000, "rew": 2537.3, "rew_std": 751.1694948545235, "Agent": "c51"}, {"env_step": 6300000, "rew": 2379.9, "rew_std": 653.9957874482068, "Agent": "c51"}, {"env_step": 6400000, "rew": 2658.8, "rew_std": 892.9185629160143, "Agent": "c51"}, {"env_step": 6500000, "rew": 2667.7, "rew_std": 955.2983879396008, "Agent": "c51"}, {"env_step": 6600000, "rew": 2439.5, "rew_std": 665.1733984458489, "Agent": "c51"}, {"env_step": 6700000, "rew": 2414.2, "rew_std": 540.237318222279, "Agent": "c51"}, {"env_step": 6800000, "rew": 2644.7, "rew_std": 830.3852178356741, "Agent": "c51"}, {"env_step": 6900000, "rew": 2430.4, "rew_std": 685.8056867655736, "Agent": "c51"}, {"env_step": 7000000, "rew": 2793.1, "rew_std": 1111.4479250059358, "Agent": "c51"}, {"env_step": 7100000, "rew": 2576.3, "rew_std": 875.817338261809, "Agent": "c51"}, {"env_step": 7200000, "rew": 2954.8, "rew_std": 1122.580758787536, "Agent": "c51"}, {"env_step": 7300000, "rew": 2826.6, "rew_std": 955.3017533742938, "Agent": "c51"}, {"env_step": 7400000, "rew": 2654.4, "rew_std": 863.3571914335341, "Agent": "c51"}, {"env_step": 7500000, "rew": 2642.0, "rew_std": 993.2044099781273, "Agent": "c51"}, {"env_step": 7600000, "rew": 2686.2, "rew_std": 1022.8722109823885, "Agent": "c51"}, {"env_step": 7700000, "rew": 2804.2, "rew_std": 1017.5520428951043, "Agent": "c51"}, {"env_step": 7800000, "rew": 2618.4, "rew_std": 754.956846448855, "Agent": "c51"}, {"env_step": 7900000, "rew": 2661.4, "rew_std": 895.5007761023996, "Agent": "c51"}, {"env_step": 8000000, "rew": 2549.6, "rew_std": 666.4779366190602, "Agent": "c51"}, {"env_step": 8100000, "rew": 2469.4, "rew_std": 947.9606742898145, "Agent": "c51"}, {"env_step": 8200000, "rew": 2505.2, "rew_std": 674.0418087923033, "Agent": "c51"}, {"env_step": 8300000, "rew": 2662.4, "rew_std": 742.0776509234057, "Agent": "c51"}, {"env_step": 8400000, "rew": 2667.8, "rew_std": 823.5222887086907, "Agent": "c51"}, {"env_step": 8500000, "rew": 2946.4, "rew_std": 1133.8231960936414, "Agent": "c51"}, {"env_step": 8600000, "rew": 2712.2, "rew_std": 895.4735953672783, "Agent": "c51"}, {"env_step": 8700000, "rew": 2573.9, "rew_std": 830.3705739005928, "Agent": "c51"}, {"env_step": 8800000, "rew": 2695.3, "rew_std": 1011.963146562166, "Agent": "c51"}, {"env_step": 8900000, "rew": 2988.3, "rew_std": 1189.3779929021725, "Agent": "c51"}, {"env_step": 9000000, "rew": 3090.7, "rew_std": 1242.8095630465675, "Agent": "c51"}, {"env_step": 9100000, "rew": 2933.6, "rew_std": 1181.3922464617751, "Agent": "c51"}, {"env_step": 9200000, "rew": 2749.2, "rew_std": 1097.8828534957636, "Agent": "c51"}, {"env_step": 9300000, "rew": 2900.2, "rew_std": 1171.506961140223, "Agent": "c51"}, {"env_step": 9400000, "rew": 2628.8, "rew_std": 762.4866949658859, "Agent": "c51"}, {"env_step": 9500000, "rew": 2926.8, "rew_std": 1080.2360667928099, "Agent": "c51"}, {"env_step": 9600000, "rew": 2832.0, "rew_std": 1037.878316567024, "Agent": "c51"}, {"env_step": 9700000, "rew": 3305.4, "rew_std": 1524.3043790529503, "Agent": "c51"}, {"env_step": 9800000, "rew": 2810.2, "rew_std": 1373.4536613952434, "Agent": "c51"}, {"env_step": 9900000, "rew": 2678.6, "rew_std": 794.8096879127733, "Agent": "c51"}, {"env_step": 10000000, "rew": 2879.0, "rew_std": 1374.2044243852513, "Agent": "c51"}, {"env_step": 0, "rew": 67.6, "rew_std": 52.75452587219413, "Agent": "dqn"}, {"env_step": 100000, "rew": 221.0, "rew_std": 43.148580509676094, "Agent": "dqn"}, {"env_step": 200000, "rew": 284.2, "rew_std": 53.54960317313286, "Agent": "dqn"}, {"env_step": 300000, "rew": 255.8, "rew_std": 84.65671857566888, "Agent": "dqn"}, {"env_step": 400000, "rew": 283.4, "rew_std": 66.36294146585125, "Agent": "dqn"}, {"env_step": 500000, "rew": 266.6, "rew_std": 59.49151199961218, "Agent": "dqn"}, {"env_step": 600000, "rew": 290.4, "rew_std": 82.25715774326268, "Agent": "dqn"}, {"env_step": 700000, "rew": 346.2, "rew_std": 104.86353036208536, "Agent": "dqn"}, {"env_step": 800000, "rew": 407.4, "rew_std": 125.18801859603019, "Agent": "dqn"}, {"env_step": 900000, "rew": 519.8, "rew_std": 125.2436026310326, "Agent": "dqn"}, {"env_step": 1000000, "rew": 500.8, "rew_std": 113.11304080432106, "Agent": "dqn"}, {"env_step": 1100000, "rew": 857.6, "rew_std": 217.36200219909645, "Agent": "dqn"}, {"env_step": 1200000, "rew": 909.4, "rew_std": 323.0765234429763, "Agent": "dqn"}, {"env_step": 1300000, "rew": 1074.6, "rew_std": 300.64337677720425, "Agent": "dqn"}, {"env_step": 1400000, "rew": 1264.8, "rew_std": 315.2956707600027, "Agent": "dqn"}, {"env_step": 1500000, "rew": 1273.2, "rew_std": 365.40629441759756, "Agent": "dqn"}, {"env_step": 1600000, "rew": 1206.4, "rew_std": 444.22498804097006, "Agent": "dqn"}, {"env_step": 1700000, "rew": 1501.1, "rew_std": 372.9220964222957, "Agent": "dqn"}, {"env_step": 1800000, "rew": 1625.4, "rew_std": 438.0347474801514, "Agent": "dqn"}, {"env_step": 1900000, "rew": 1565.2, "rew_std": 472.6666478608365, "Agent": "dqn"}, {"env_step": 2000000, "rew": 1754.0, "rew_std": 312.49191989553907, "Agent": "dqn"}, {"env_step": 2100000, "rew": 1821.2, "rew_std": 352.37389233596747, "Agent": "dqn"}, {"env_step": 2200000, "rew": 1993.0, "rew_std": 548.2636227217706, "Agent": "dqn"}, {"env_step": 2300000, "rew": 1839.2, "rew_std": 397.0201002468263, "Agent": "dqn"}, {"env_step": 2400000, "rew": 2161.7, "rew_std": 328.52276937831874, "Agent": "dqn"}, {"env_step": 2500000, "rew": 2045.4, "rew_std": 817.1477467386177, "Agent": "dqn"}, {"env_step": 2600000, "rew": 1969.4, "rew_std": 666.9255130822332, "Agent": "dqn"}, {"env_step": 2700000, "rew": 2051.1, "rew_std": 532.0207608731073, "Agent": "dqn"}, {"env_step": 2800000, "rew": 2071.5, "rew_std": 529.4195406291686, "Agent": "dqn"}, {"env_step": 2900000, "rew": 1928.8, "rew_std": 357.8560604488905, "Agent": "dqn"}, {"env_step": 3000000, "rew": 2327.6, "rew_std": 610.7462975737143, "Agent": "dqn"}, {"env_step": 3100000, "rew": 2295.2, "rew_std": 519.696988638572, "Agent": "dqn"}, {"env_step": 3200000, "rew": 1959.3, "rew_std": 419.5507239893646, "Agent": "dqn"}, {"env_step": 3300000, "rew": 2432.6, "rew_std": 510.47276127135325, "Agent": "dqn"}, {"env_step": 3400000, "rew": 2435.4, "rew_std": 451.0752043728407, "Agent": "dqn"}, {"env_step": 3500000, "rew": 2519.0, "rew_std": 417.8040210433595, "Agent": "dqn"}, {"env_step": 3600000, "rew": 2485.6, "rew_std": 568.3300449562736, "Agent": "dqn"}, {"env_step": 3700000, "rew": 2359.6, "rew_std": 628.5970410366247, "Agent": "dqn"}, {"env_step": 3800000, "rew": 2478.4, "rew_std": 378.3150010242787, "Agent": "dqn"}, {"env_step": 3900000, "rew": 2657.6, "rew_std": 525.0781275200862, "Agent": "dqn"}, {"env_step": 4000000, "rew": 2616.8, "rew_std": 352.63715062369704, "Agent": "dqn"}, {"env_step": 4100000, "rew": 2332.2, "rew_std": 373.4396336759129, "Agent": "dqn"}, {"env_step": 4200000, "rew": 2553.5, "rew_std": 363.4631343066309, "Agent": "dqn"}, {"env_step": 4300000, "rew": 2390.0, "rew_std": 644.888207366207, "Agent": "dqn"}, {"env_step": 4400000, "rew": 2727.2, "rew_std": 635.8051273778783, "Agent": "dqn"}, {"env_step": 4500000, "rew": 2780.6, "rew_std": 470.3411953040048, "Agent": "dqn"}, {"env_step": 4600000, "rew": 2597.2, "rew_std": 523.4158576122813, "Agent": "dqn"}, {"env_step": 4700000, "rew": 2602.5, "rew_std": 744.5147748701835, "Agent": "dqn"}, {"env_step": 4800000, "rew": 2417.0, "rew_std": 749.8470510710835, "Agent": "dqn"}, {"env_step": 4900000, "rew": 2945.2, "rew_std": 587.9389083909995, "Agent": "dqn"}, {"env_step": 5000000, "rew": 2675.3, "rew_std": 784.2958689168265, "Agent": "dqn"}, {"env_step": 5100000, "rew": 2855.4, "rew_std": 1029.4697858606633, "Agent": "dqn"}, {"env_step": 5200000, "rew": 2557.8, "rew_std": 957.7696800379516, "Agent": "dqn"}, {"env_step": 5300000, "rew": 2583.4, "rew_std": 686.686566054703, "Agent": "dqn"}, {"env_step": 5400000, "rew": 2643.4, "rew_std": 625.6280364561678, "Agent": "dqn"}, {"env_step": 5500000, "rew": 2624.8, "rew_std": 485.9606568437408, "Agent": "dqn"}, {"env_step": 5600000, "rew": 2627.2, "rew_std": 482.2772646517769, "Agent": "dqn"}, {"env_step": 5700000, "rew": 2659.2, "rew_std": 828.4439389602655, "Agent": "dqn"}, {"env_step": 5800000, "rew": 2599.4, "rew_std": 550.9120074930297, "Agent": "dqn"}, {"env_step": 5900000, "rew": 2938.3, "rew_std": 744.541207724596, "Agent": "dqn"}, {"env_step": 6000000, "rew": 2851.0, "rew_std": 557.9026796852655, "Agent": "dqn"}, {"env_step": 6100000, "rew": 2454.4, "rew_std": 921.5052034579078, "Agent": "dqn"}, {"env_step": 6200000, "rew": 2610.6, "rew_std": 869.3878536073529, "Agent": "dqn"}, {"env_step": 6300000, "rew": 2773.0, "rew_std": 432.34268815373764, "Agent": "dqn"}, {"env_step": 6400000, "rew": 2506.0, "rew_std": 803.5655542642429, "Agent": "dqn"}, {"env_step": 6500000, "rew": 2808.7, "rew_std": 689.5932206743335, "Agent": "dqn"}, {"env_step": 6600000, "rew": 2985.2, "rew_std": 595.4958941923949, "Agent": "dqn"}, {"env_step": 6700000, "rew": 2698.0, "rew_std": 634.7957151714243, "Agent": "dqn"}, {"env_step": 6800000, "rew": 2821.2, "rew_std": 647.6642339978331, "Agent": "dqn"}, {"env_step": 6900000, "rew": 2988.2, "rew_std": 699.9722565930738, "Agent": "dqn"}, {"env_step": 7000000, "rew": 2854.4, "rew_std": 386.69864235603416, "Agent": "dqn"}, {"env_step": 7100000, "rew": 2749.0, "rew_std": 739.4631836677199, "Agent": "dqn"}, {"env_step": 7200000, "rew": 2854.4, "rew_std": 721.7993072870048, "Agent": "dqn"}, {"env_step": 7300000, "rew": 2570.2, "rew_std": 562.2785430727372, "Agent": "dqn"}, {"env_step": 7400000, "rew": 2909.4, "rew_std": 663.843385144418, "Agent": "dqn"}, {"env_step": 7500000, "rew": 2631.1, "rew_std": 731.3366461486802, "Agent": "dqn"}, {"env_step": 7600000, "rew": 2852.2, "rew_std": 665.8404914091662, "Agent": "dqn"}, {"env_step": 7700000, "rew": 2876.5, "rew_std": 423.60199480172423, "Agent": "dqn"}, {"env_step": 7800000, "rew": 2636.4, "rew_std": 778.3766697428694, "Agent": "dqn"}, {"env_step": 7900000, "rew": 2651.3, "rew_std": 599.0412423197588, "Agent": "dqn"}, {"env_step": 8000000, "rew": 2770.2, "rew_std": 600.3961692082987, "Agent": "dqn"}, {"env_step": 8100000, "rew": 2965.0, "rew_std": 660.43697655416, "Agent": "dqn"}, {"env_step": 8200000, "rew": 2998.4, "rew_std": 484.83795230984134, "Agent": "dqn"}, {"env_step": 8300000, "rew": 2604.2, "rew_std": 553.038479673883, "Agent": "dqn"}, {"env_step": 8400000, "rew": 2286.0, "rew_std": 568.7192629056976, "Agent": "dqn"}, {"env_step": 8500000, "rew": 2715.2, "rew_std": 530.8809282692307, "Agent": "dqn"}, {"env_step": 8600000, "rew": 2736.2, "rew_std": 531.8183524475251, "Agent": "dqn"}, {"env_step": 8700000, "rew": 2767.8, "rew_std": 546.1792379796215, "Agent": "dqn"}, {"env_step": 8800000, "rew": 2634.8, "rew_std": 725.9069912874514, "Agent": "dqn"}, {"env_step": 8900000, "rew": 2286.2, "rew_std": 622.9314247973047, "Agent": "dqn"}, {"env_step": 9000000, "rew": 2815.0, "rew_std": 796.1378021423176, "Agent": "dqn"}, {"env_step": 9100000, "rew": 2723.2, "rew_std": 613.141549725673, "Agent": "dqn"}, {"env_step": 9200000, "rew": 2820.4, "rew_std": 687.1970896329524, "Agent": "dqn"}, {"env_step": 9300000, "rew": 2704.2, "rew_std": 625.2215287400139, "Agent": "dqn"}, {"env_step": 9400000, "rew": 2331.2, "rew_std": 761.4608066079304, "Agent": "dqn"}, {"env_step": 9500000, "rew": 2712.7, "rew_std": 589.839308625663, "Agent": "dqn"}, {"env_step": 9600000, "rew": 2890.0, "rew_std": 690.3222435935264, "Agent": "dqn"}, {"env_step": 9700000, "rew": 2330.1, "rew_std": 573.4458038908298, "Agent": "dqn"}, {"env_step": 9800000, "rew": 2720.6, "rew_std": 1005.5040725924486, "Agent": "dqn"}, {"env_step": 9900000, "rew": 3213.9, "rew_std": 381.56741213054346, "Agent": "dqn"}, {"env_step": 10000000, "rew": 2365.6, "rew_std": 703.0867940731073, "Agent": "dqn"}, {"env_step": 0, "rew": 84.0, "rew_std": 39.97999499749844, "Agent": "fqf"}, {"env_step": 100000, "rew": 235.4, "rew_std": 48.70359329659363, "Agent": "fqf"}, {"env_step": 200000, "rew": 270.2, "rew_std": 64.16509954796298, "Agent": "fqf"}, {"env_step": 300000, "rew": 268.0, "rew_std": 43.174066289845804, "Agent": "fqf"}, {"env_step": 400000, "rew": 273.4, "rew_std": 98.09403651598807, "Agent": "fqf"}, {"env_step": 500000, "rew": 311.6, "rew_std": 46.21514903145937, "Agent": "fqf"}, {"env_step": 600000, "rew": 390.6, "rew_std": 107.00299061241232, "Agent": "fqf"}, {"env_step": 700000, "rew": 513.8, "rew_std": 207.3131930196436, "Agent": "fqf"}, {"env_step": 800000, "rew": 677.2, "rew_std": 171.06536762302298, "Agent": "fqf"}, {"env_step": 900000, "rew": 902.2, "rew_std": 367.83088505453156, "Agent": "fqf"}, {"env_step": 1000000, "rew": 1180.2, "rew_std": 368.2884195844338, "Agent": "fqf"}, {"env_step": 1100000, "rew": 1722.4, "rew_std": 516.4639774466366, "Agent": "fqf"}, {"env_step": 1200000, "rew": 2106.6, "rew_std": 570.7236108660653, "Agent": "fqf"}, {"env_step": 1300000, "rew": 2475.0, "rew_std": 793.9652385337787, "Agent": "fqf"}, {"env_step": 1400000, "rew": 2825.5, "rew_std": 731.4583036646724, "Agent": "fqf"}, {"env_step": 1500000, "rew": 3100.2, "rew_std": 422.5548011796813, "Agent": "fqf"}, {"env_step": 1600000, "rew": 3458.9, "rew_std": 911.015197458308, "Agent": "fqf"}, {"env_step": 1700000, "rew": 3497.4, "rew_std": 772.6115712309776, "Agent": "fqf"}, {"env_step": 1800000, "rew": 3650.5, "rew_std": 925.8434262876202, "Agent": "fqf"}, {"env_step": 1900000, "rew": 3701.9, "rew_std": 668.0494667313193, "Agent": "fqf"}, {"env_step": 2000000, "rew": 3597.7, "rew_std": 658.9843776600474, "Agent": "fqf"}, {"env_step": 2100000, "rew": 3653.4, "rew_std": 609.672239814148, "Agent": "fqf"}, {"env_step": 2200000, "rew": 4249.8, "rew_std": 837.1662678345324, "Agent": "fqf"}, {"env_step": 2300000, "rew": 4032.9, "rew_std": 788.8272878140056, "Agent": "fqf"}, {"env_step": 2400000, "rew": 4410.0, "rew_std": 802.1447500295691, "Agent": "fqf"}, {"env_step": 2500000, "rew": 4966.7, "rew_std": 1177.294359962707, "Agent": "fqf"}, {"env_step": 2600000, "rew": 4576.1, "rew_std": 841.7940900243955, "Agent": "fqf"}, {"env_step": 2700000, "rew": 5155.4, "rew_std": 1126.5631984047766, "Agent": "fqf"}, {"env_step": 2800000, "rew": 5071.3, "rew_std": 472.3333674429534, "Agent": "fqf"}, {"env_step": 2900000, "rew": 4688.0, "rew_std": 717.3926400514574, "Agent": "fqf"}, {"env_step": 3000000, "rew": 4985.2, "rew_std": 726.9461878296082, "Agent": "fqf"}, {"env_step": 3100000, "rew": 4975.1, "rew_std": 585.9778920744366, "Agent": "fqf"}, {"env_step": 3200000, "rew": 4920.8, "rew_std": 1034.2605861193783, "Agent": "fqf"}, {"env_step": 3300000, "rew": 5047.4, "rew_std": 724.611233697077, "Agent": "fqf"}, {"env_step": 3400000, "rew": 5616.9, "rew_std": 1700.5169478720288, "Agent": "fqf"}, {"env_step": 3500000, "rew": 5794.7, "rew_std": 1492.3058031114133, "Agent": "fqf"}, {"env_step": 3600000, "rew": 5340.5, "rew_std": 1678.8342532841054, "Agent": "fqf"}, {"env_step": 3700000, "rew": 5262.5, "rew_std": 1011.5026692994933, "Agent": "fqf"}, {"env_step": 3800000, "rew": 5265.4, "rew_std": 708.2388297742507, "Agent": "fqf"}, {"env_step": 3900000, "rew": 5469.6, "rew_std": 858.3154664807107, "Agent": "fqf"}, {"env_step": 4000000, "rew": 6005.2, "rew_std": 1882.1700667049192, "Agent": "fqf"}, {"env_step": 4100000, "rew": 5602.9, "rew_std": 1134.739304862575, "Agent": "fqf"}, {"env_step": 4200000, "rew": 5792.3, "rew_std": 707.0452672919888, "Agent": "fqf"}, {"env_step": 4300000, "rew": 5279.0, "rew_std": 1276.4659807452763, "Agent": "fqf"}, {"env_step": 4400000, "rew": 5126.0, "rew_std": 1668.219589862198, "Agent": "fqf"}, {"env_step": 4500000, "rew": 5870.5, "rew_std": 1084.202771625308, "Agent": "fqf"}, {"env_step": 4600000, "rew": 5440.8, "rew_std": 1613.5675876764506, "Agent": "fqf"}, {"env_step": 4700000, "rew": 5901.3, "rew_std": 586.3447876463131, "Agent": "fqf"}, {"env_step": 4800000, "rew": 5909.6, "rew_std": 1153.2979840440196, "Agent": "fqf"}, {"env_step": 4900000, "rew": 6558.0, "rew_std": 1928.9374277046936, "Agent": "fqf"}, {"env_step": 5000000, "rew": 6140.0, "rew_std": 1449.9375848635693, "Agent": "fqf"}, {"env_step": 5100000, "rew": 6061.0, "rew_std": 844.278626994667, "Agent": "fqf"}, {"env_step": 5200000, "rew": 5817.9, "rew_std": 983.7778661872811, "Agent": "fqf"}, {"env_step": 5300000, "rew": 6269.0, "rew_std": 660.6495288729116, "Agent": "fqf"}, {"env_step": 5400000, "rew": 5512.1, "rew_std": 1459.216464408211, "Agent": "fqf"}, {"env_step": 5500000, "rew": 5616.9, "rew_std": 1634.4490478445637, "Agent": "fqf"}, {"env_step": 5600000, "rew": 6840.6, "rew_std": 1181.8537303744486, "Agent": "fqf"}, {"env_step": 5700000, "rew": 6313.4, "rew_std": 1765.277666544275, "Agent": "fqf"}, {"env_step": 5800000, "rew": 6400.5, "rew_std": 2038.3985012749592, "Agent": "fqf"}, {"env_step": 5900000, "rew": 6898.0, "rew_std": 1592.9492145074807, "Agent": "fqf"}, {"env_step": 6000000, "rew": 6413.2, "rew_std": 2133.357813401212, "Agent": "fqf"}, {"env_step": 6100000, "rew": 6410.2, "rew_std": 1778.3248184738356, "Agent": "fqf"}, {"env_step": 6200000, "rew": 6357.5, "rew_std": 1512.9386140884897, "Agent": "fqf"}, {"env_step": 6300000, "rew": 6276.0, "rew_std": 815.767613968586, "Agent": "fqf"}, {"env_step": 6400000, "rew": 6026.5, "rew_std": 1442.6516038184686, "Agent": "fqf"}, {"env_step": 6500000, "rew": 6285.0, "rew_std": 1306.420376448561, "Agent": "fqf"}, {"env_step": 6600000, "rew": 6946.2, "rew_std": 1895.469588255111, "Agent": "fqf"}, {"env_step": 6700000, "rew": 6952.1, "rew_std": 1505.6863185936172, "Agent": "fqf"}, {"env_step": 6800000, "rew": 6325.1, "rew_std": 1797.747893893913, "Agent": "fqf"}, {"env_step": 6900000, "rew": 6713.2, "rew_std": 1581.9958154179803, "Agent": "fqf"}, {"env_step": 7000000, "rew": 6725.5, "rew_std": 1307.073238192872, "Agent": "fqf"}, {"env_step": 7100000, "rew": 6847.9, "rew_std": 1273.460596170922, "Agent": "fqf"}, {"env_step": 7200000, "rew": 7050.2, "rew_std": 1556.5933187573432, "Agent": "fqf"}, {"env_step": 7300000, "rew": 6831.8, "rew_std": 1364.3794047111676, "Agent": "fqf"}, {"env_step": 7400000, "rew": 6303.4, "rew_std": 1708.135252256097, "Agent": "fqf"}, {"env_step": 7500000, "rew": 7570.5, "rew_std": 2275.7164695980914, "Agent": "fqf"}, {"env_step": 7600000, "rew": 7652.3, "rew_std": 2182.971646632177, "Agent": "fqf"}, {"env_step": 7700000, "rew": 7493.9, "rew_std": 2103.570604947692, "Agent": "fqf"}, {"env_step": 7800000, "rew": 7694.6, "rew_std": 2340.8724954597587, "Agent": "fqf"}, {"env_step": 7900000, "rew": 6932.5, "rew_std": 1200.7625285625797, "Agent": "fqf"}, {"env_step": 8000000, "rew": 7276.4, "rew_std": 1941.6771204296558, "Agent": "fqf"}, {"env_step": 8100000, "rew": 6880.9, "rew_std": 1708.650546483979, "Agent": "fqf"}, {"env_step": 8200000, "rew": 6877.8, "rew_std": 1889.6905460947833, "Agent": "fqf"}, {"env_step": 8300000, "rew": 6632.9, "rew_std": 1580.6722905143872, "Agent": "fqf"}, {"env_step": 8400000, "rew": 7083.5, "rew_std": 1896.2999894531456, "Agent": "fqf"}, {"env_step": 8500000, "rew": 6696.8, "rew_std": 2655.081648462058, "Agent": "fqf"}, {"env_step": 8600000, "rew": 7298.9, "rew_std": 2318.328382693013, "Agent": "fqf"}, {"env_step": 8700000, "rew": 6763.7, "rew_std": 1158.4843589794382, "Agent": "fqf"}, {"env_step": 8800000, "rew": 7196.0, "rew_std": 1865.8348265588784, "Agent": "fqf"}, {"env_step": 8900000, "rew": 6880.7, "rew_std": 1600.4205728495244, "Agent": "fqf"}, {"env_step": 9000000, "rew": 7794.2, "rew_std": 2350.0790965412207, "Agent": "fqf"}, {"env_step": 9100000, "rew": 7289.3, "rew_std": 1832.8727206219203, "Agent": "fqf"}, {"env_step": 9200000, "rew": 6713.8, "rew_std": 1709.8212070272143, "Agent": "fqf"}, {"env_step": 9300000, "rew": 7391.5, "rew_std": 2495.9832631650397, "Agent": "fqf"}, {"env_step": 9400000, "rew": 7061.4, "rew_std": 1013.7232561207226, "Agent": "fqf"}, {"env_step": 9500000, "rew": 7424.5, "rew_std": 2155.5881911905158, "Agent": "fqf"}, {"env_step": 9600000, "rew": 7426.3, "rew_std": 1927.6538615633253, "Agent": "fqf"}, {"env_step": 9700000, "rew": 7352.0, "rew_std": 1948.6867372669215, "Agent": "fqf"}, {"env_step": 9800000, "rew": 7327.9, "rew_std": 1429.7993880261663, "Agent": "fqf"}, {"env_step": 9900000, "rew": 8051.5, "rew_std": 3155.5843912023647, "Agent": "fqf"}, {"env_step": 10000000, "rew": 6903.5, "rew_std": 1400.5262046816547, "Agent": "fqf"}, {"env_step": 0, "rew": 45.4, "rew_std": 52.91540418441496, "Agent": "qrdqn"}, {"env_step": 100000, "rew": 200.0, "rew_std": 31.41973901864877, "Agent": "qrdqn"}, {"env_step": 200000, "rew": 289.4, "rew_std": 83.21562353308423, "Agent": "qrdqn"}, {"env_step": 300000, "rew": 258.4, "rew_std": 86.97493891920823, "Agent": "qrdqn"}, {"env_step": 400000, "rew": 267.0, "rew_std": 81.28591513909406, "Agent": "qrdqn"}, {"env_step": 500000, "rew": 300.6, "rew_std": 89.50553055537965, "Agent": "qrdqn"}, {"env_step": 600000, "rew": 325.2, "rew_std": 81.07379354637354, "Agent": "qrdqn"}, {"env_step": 700000, "rew": 408.6, "rew_std": 74.08670595997638, "Agent": "qrdqn"}, {"env_step": 800000, "rew": 465.0, "rew_std": 195.36273953853123, "Agent": "qrdqn"}, {"env_step": 900000, "rew": 629.4, "rew_std": 227.03488718696954, "Agent": "qrdqn"}, {"env_step": 1000000, "rew": 899.2, "rew_std": 221.7118851121879, "Agent": "qrdqn"}, {"env_step": 1100000, "rew": 1039.5, "rew_std": 408.2810918962572, "Agent": "qrdqn"}, {"env_step": 1200000, "rew": 1266.2, "rew_std": 453.6681165786284, "Agent": "qrdqn"}, {"env_step": 1300000, "rew": 1240.1, "rew_std": 317.7037771258, "Agent": "qrdqn"}, {"env_step": 1400000, "rew": 1547.9, "rew_std": 501.95188016382605, "Agent": "qrdqn"}, {"env_step": 1500000, "rew": 1760.2, "rew_std": 333.3532060742779, "Agent": "qrdqn"}, {"env_step": 1600000, "rew": 1911.5, "rew_std": 621.3912213734596, "Agent": "qrdqn"}, {"env_step": 1700000, "rew": 1998.1, "rew_std": 404.5299618075279, "Agent": "qrdqn"}, {"env_step": 1800000, "rew": 2403.4, "rew_std": 561.4866338569423, "Agent": "qrdqn"}, {"env_step": 1900000, "rew": 2352.4, "rew_std": 371.63858787806197, "Agent": "qrdqn"}, {"env_step": 2000000, "rew": 2128.1, "rew_std": 730.9558741811984, "Agent": "qrdqn"}, {"env_step": 2100000, "rew": 2500.0, "rew_std": 809.2574374078993, "Agent": "qrdqn"}, {"env_step": 2200000, "rew": 2503.2, "rew_std": 550.9215552145332, "Agent": "qrdqn"}, {"env_step": 2300000, "rew": 2622.2, "rew_std": 507.82079516301815, "Agent": "qrdqn"}, {"env_step": 2400000, "rew": 2551.8, "rew_std": 607.7318158530126, "Agent": "qrdqn"}, {"env_step": 2500000, "rew": 2391.6, "rew_std": 668.416516851581, "Agent": "qrdqn"}, {"env_step": 2600000, "rew": 2284.1, "rew_std": 935.7713876797045, "Agent": "qrdqn"}, {"env_step": 2700000, "rew": 2470.0, "rew_std": 762.4539330346457, "Agent": "qrdqn"}, {"env_step": 2800000, "rew": 2389.0, "rew_std": 905.0358003968684, "Agent": "qrdqn"}, {"env_step": 2900000, "rew": 2890.4, "rew_std": 717.7201683107421, "Agent": "qrdqn"}, {"env_step": 3000000, "rew": 2774.2, "rew_std": 525.8086724275284, "Agent": "qrdqn"}, {"env_step": 3100000, "rew": 2885.2, "rew_std": 427.62993347051844, "Agent": "qrdqn"}, {"env_step": 3200000, "rew": 2853.4, "rew_std": 634.3021677402656, "Agent": "qrdqn"}, {"env_step": 3300000, "rew": 2818.2, "rew_std": 437.66238129407463, "Agent": "qrdqn"}, {"env_step": 3400000, "rew": 3153.4, "rew_std": 560.2157084552342, "Agent": "qrdqn"}, {"env_step": 3500000, "rew": 2667.6, "rew_std": 998.5805125276579, "Agent": "qrdqn"}, {"env_step": 3600000, "rew": 3060.6, "rew_std": 483.220901865803, "Agent": "qrdqn"}, {"env_step": 3700000, "rew": 2940.4, "rew_std": 498.1427907738905, "Agent": "qrdqn"}, {"env_step": 3800000, "rew": 3141.6, "rew_std": 600.0958590092087, "Agent": "qrdqn"}, {"env_step": 3900000, "rew": 3165.2, "rew_std": 600.9986356057724, "Agent": "qrdqn"}, {"env_step": 4000000, "rew": 2781.6, "rew_std": 783.5125014956686, "Agent": "qrdqn"}, {"env_step": 4100000, "rew": 3374.4, "rew_std": 895.0686230675277, "Agent": "qrdqn"}, {"env_step": 4200000, "rew": 2629.0, "rew_std": 847.0873626728237, "Agent": "qrdqn"}, {"env_step": 4300000, "rew": 3079.4, "rew_std": 804.6664153548352, "Agent": "qrdqn"}, {"env_step": 4400000, "rew": 3388.8, "rew_std": 672.935182614195, "Agent": "qrdqn"}, {"env_step": 4500000, "rew": 3347.8, "rew_std": 759.8049486545872, "Agent": "qrdqn"}, {"env_step": 4600000, "rew": 3110.0, "rew_std": 800.3929035167666, "Agent": "qrdqn"}, {"env_step": 4700000, "rew": 3388.4, "rew_std": 840.9579299822316, "Agent": "qrdqn"}, {"env_step": 4800000, "rew": 3641.4, "rew_std": 761.8887320337531, "Agent": "qrdqn"}, {"env_step": 4900000, "rew": 3562.0, "rew_std": 694.3111694334176, "Agent": "qrdqn"}, {"env_step": 5000000, "rew": 3529.8, "rew_std": 537.6299470825635, "Agent": "qrdqn"}, {"env_step": 5100000, "rew": 3322.8, "rew_std": 854.5160969812096, "Agent": "qrdqn"}, {"env_step": 5200000, "rew": 3274.0, "rew_std": 1038.5505283807813, "Agent": "qrdqn"}, {"env_step": 5300000, "rew": 3571.0, "rew_std": 705.0641105601675, "Agent": "qrdqn"}, {"env_step": 5400000, "rew": 3157.2, "rew_std": 1001.435449742019, "Agent": "qrdqn"}, {"env_step": 5500000, "rew": 3315.6, "rew_std": 1095.1945215348735, "Agent": "qrdqn"}, {"env_step": 5600000, "rew": 3545.9, "rew_std": 659.3630942053096, "Agent": "qrdqn"}, {"env_step": 5700000, "rew": 3607.2, "rew_std": 404.7084876797125, "Agent": "qrdqn"}, {"env_step": 5800000, "rew": 3753.6, "rew_std": 658.8455357669201, "Agent": "qrdqn"}, {"env_step": 5900000, "rew": 3261.0, "rew_std": 661.858746259351, "Agent": "qrdqn"}, {"env_step": 6000000, "rew": 3644.2, "rew_std": 767.9252307353887, "Agent": "qrdqn"}, {"env_step": 6100000, "rew": 3731.4, "rew_std": 678.6693156464347, "Agent": "qrdqn"}, {"env_step": 6200000, "rew": 4187.6, "rew_std": 725.6907330261287, "Agent": "qrdqn"}, {"env_step": 6300000, "rew": 3814.6, "rew_std": 838.6646767331983, "Agent": "qrdqn"}, {"env_step": 6400000, "rew": 3318.2, "rew_std": 769.7981293819828, "Agent": "qrdqn"}, {"env_step": 6500000, "rew": 3726.2, "rew_std": 650.0095076227732, "Agent": "qrdqn"}, {"env_step": 6600000, "rew": 3536.0, "rew_std": 672.9190144437888, "Agent": "qrdqn"}, {"env_step": 6700000, "rew": 3278.2, "rew_std": 785.3837024028446, "Agent": "qrdqn"}, {"env_step": 6800000, "rew": 3081.8, "rew_std": 919.7605992865751, "Agent": "qrdqn"}, {"env_step": 6900000, "rew": 3286.2, "rew_std": 544.497897149291, "Agent": "qrdqn"}, {"env_step": 7000000, "rew": 3537.8, "rew_std": 511.0807763944952, "Agent": "qrdqn"}, {"env_step": 7100000, "rew": 3516.8, "rew_std": 652.3656643325122, "Agent": "qrdqn"}, {"env_step": 7200000, "rew": 3074.8, "rew_std": 885.5579935837065, "Agent": "qrdqn"}, {"env_step": 7300000, "rew": 3015.6, "rew_std": 826.9422228910554, "Agent": "qrdqn"}, {"env_step": 7400000, "rew": 3113.0, "rew_std": 853.1176941079115, "Agent": "qrdqn"}, {"env_step": 7500000, "rew": 3382.6, "rew_std": 425.42407078114417, "Agent": "qrdqn"}, {"env_step": 7600000, "rew": 3832.2, "rew_std": 956.4151609003278, "Agent": "qrdqn"}, {"env_step": 7700000, "rew": 3565.6, "rew_std": 824.5613621799168, "Agent": "qrdqn"}, {"env_step": 7800000, "rew": 3260.4, "rew_std": 1026.301242326053, "Agent": "qrdqn"}, {"env_step": 7900000, "rew": 3165.4, "rew_std": 1141.938019333799, "Agent": "qrdqn"}, {"env_step": 8000000, "rew": 3833.2, "rew_std": 872.2041962751612, "Agent": "qrdqn"}, {"env_step": 8100000, "rew": 3275.8, "rew_std": 543.5265954854463, "Agent": "qrdqn"}, {"env_step": 8200000, "rew": 3510.4, "rew_std": 1006.9696321140971, "Agent": "qrdqn"}, {"env_step": 8300000, "rew": 3475.6, "rew_std": 1033.5646278777153, "Agent": "qrdqn"}, {"env_step": 8400000, "rew": 3522.6, "rew_std": 554.0274722430288, "Agent": "qrdqn"}, {"env_step": 8500000, "rew": 3770.6, "rew_std": 639.6787005989804, "Agent": "qrdqn"}, {"env_step": 8600000, "rew": 3319.4, "rew_std": 514.5678186595038, "Agent": "qrdqn"}, {"env_step": 8700000, "rew": 3270.6, "rew_std": 850.6858644646684, "Agent": "qrdqn"}, {"env_step": 8800000, "rew": 3856.0, "rew_std": 599.3169445293533, "Agent": "qrdqn"}, {"env_step": 8900000, "rew": 3440.7, "rew_std": 711.9547808674369, "Agent": "qrdqn"}, {"env_step": 9000000, "rew": 3568.7, "rew_std": 857.8783188774502, "Agent": "qrdqn"}, {"env_step": 9100000, "rew": 3740.8, "rew_std": 602.7020491088444, "Agent": "qrdqn"}, {"env_step": 9200000, "rew": 3701.8, "rew_std": 647.5816241988341, "Agent": "qrdqn"}, {"env_step": 9300000, "rew": 3148.5, "rew_std": 721.4049140392655, "Agent": "qrdqn"}, {"env_step": 9400000, "rew": 3532.6, "rew_std": 894.7174079003939, "Agent": "qrdqn"}, {"env_step": 9500000, "rew": 3562.2, "rew_std": 658.007568345532, "Agent": "qrdqn"}, {"env_step": 9600000, "rew": 3524.8, "rew_std": 867.9291215300937, "Agent": "qrdqn"}, {"env_step": 9700000, "rew": 3570.2, "rew_std": 838.2216651936408, "Agent": "qrdqn"}, {"env_step": 9800000, "rew": 3432.2, "rew_std": 583.9133154844133, "Agent": "qrdqn"}, {"env_step": 9900000, "rew": 3285.6, "rew_std": 924.0051082109882, "Agent": "qrdqn"}, {"env_step": 10000000, "rew": 3202.8, "rew_std": 982.7004426578835, "Agent": "qrdqn"}, {"env_step": 0, "rew": 106.6, "rew_std": 87.09557968117556, "Agent": "iqn"}, {"env_step": 100000, "rew": 228.6, "rew_std": 33.87093148999596, "Agent": "iqn"}, {"env_step": 200000, "rew": 229.6, "rew_std": 75.1308192421725, "Agent": "iqn"}, {"env_step": 300000, "rew": 251.2, "rew_std": 79.04530346579739, "Agent": "iqn"}, {"env_step": 400000, "rew": 247.6, "rew_std": 64.6083585923679, "Agent": "iqn"}, {"env_step": 500000, "rew": 382.2, "rew_std": 127.84506247798544, "Agent": "iqn"}, {"env_step": 600000, "rew": 441.6, "rew_std": 243.14736272474764, "Agent": "iqn"}, {"env_step": 700000, "rew": 692.8, "rew_std": 305.80542833638515, "Agent": "iqn"}, {"env_step": 800000, "rew": 990.8, "rew_std": 394.5460175949062, "Agent": "iqn"}, {"env_step": 900000, "rew": 901.2, "rew_std": 464.5894531734443, "Agent": "iqn"}, {"env_step": 1000000, "rew": 1541.4, "rew_std": 385.48883252307064, "Agent": "iqn"}, {"env_step": 1100000, "rew": 2180.2, "rew_std": 539.3918427266026, "Agent": "iqn"}, {"env_step": 1200000, "rew": 2100.2, "rew_std": 646.6129908995024, "Agent": "iqn"}, {"env_step": 1300000, "rew": 2565.6, "rew_std": 635.4090336153555, "Agent": "iqn"}, {"env_step": 1400000, "rew": 2368.0, "rew_std": 440.5024404018666, "Agent": "iqn"}, {"env_step": 1500000, "rew": 2376.0, "rew_std": 1055.6806335251206, "Agent": "iqn"}, {"env_step": 1600000, "rew": 3192.5, "rew_std": 493.9407353114339, "Agent": "iqn"}, {"env_step": 1700000, "rew": 2622.4, "rew_std": 1216.6698155210393, "Agent": "iqn"}, {"env_step": 1800000, "rew": 3191.4, "rew_std": 527.0340026981181, "Agent": "iqn"}, {"env_step": 1900000, "rew": 2762.6, "rew_std": 706.9370834805597, "Agent": "iqn"}, {"env_step": 2000000, "rew": 3111.6, "rew_std": 1020.695762703069, "Agent": "iqn"}, {"env_step": 2100000, "rew": 3645.5, "rew_std": 998.2598108708975, "Agent": "iqn"}, {"env_step": 2200000, "rew": 3387.8, "rew_std": 938.3493805614196, "Agent": "iqn"}, {"env_step": 2300000, "rew": 3795.2, "rew_std": 690.2713669275294, "Agent": "iqn"}, {"env_step": 2400000, "rew": 3738.6, "rew_std": 1253.164011612207, "Agent": "iqn"}, {"env_step": 2500000, "rew": 3917.4, "rew_std": 391.9112654670697, "Agent": "iqn"}, {"env_step": 2600000, "rew": 3715.0, "rew_std": 903.7580428411135, "Agent": "iqn"}, {"env_step": 2700000, "rew": 4198.8, "rew_std": 916.1332654150268, "Agent": "iqn"}, {"env_step": 2800000, "rew": 3842.8, "rew_std": 1014.2438365600256, "Agent": "iqn"}, {"env_step": 2900000, "rew": 3685.0, "rew_std": 1446.485741374591, "Agent": "iqn"}, {"env_step": 3000000, "rew": 3950.0, "rew_std": 1456.4194450775506, "Agent": "iqn"}, {"env_step": 3100000, "rew": 4272.0, "rew_std": 806.635977377652, "Agent": "iqn"}, {"env_step": 3200000, "rew": 4197.4, "rew_std": 668.5554875999449, "Agent": "iqn"}, {"env_step": 3300000, "rew": 4473.6, "rew_std": 668.1130443270808, "Agent": "iqn"}, {"env_step": 3400000, "rew": 4128.8, "rew_std": 1420.9650804998691, "Agent": "iqn"}, {"env_step": 3500000, "rew": 4091.2, "rew_std": 799.4804312802158, "Agent": "iqn"}, {"env_step": 3600000, "rew": 3836.0, "rew_std": 495.31727205903087, "Agent": "iqn"}, {"env_step": 3700000, "rew": 3937.6, "rew_std": 955.3729324195865, "Agent": "iqn"}, {"env_step": 3800000, "rew": 4366.0, "rew_std": 646.7463181186267, "Agent": "iqn"}, {"env_step": 3900000, "rew": 4184.6, "rew_std": 648.5627494699337, "Agent": "iqn"}, {"env_step": 4000000, "rew": 4264.2, "rew_std": 1133.6307864556254, "Agent": "iqn"}, {"env_step": 4100000, "rew": 3667.2, "rew_std": 1318.5249940748186, "Agent": "iqn"}, {"env_step": 4200000, "rew": 4149.6, "rew_std": 734.6094472575206, "Agent": "iqn"}, {"env_step": 4300000, "rew": 4311.5, "rew_std": 1162.0347025799185, "Agent": "iqn"}, {"env_step": 4400000, "rew": 4001.8, "rew_std": 1118.5291949698942, "Agent": "iqn"}, {"env_step": 4500000, "rew": 4658.6, "rew_std": 651.7754521305632, "Agent": "iqn"}, {"env_step": 4600000, "rew": 4676.1, "rew_std": 562.302134088072, "Agent": "iqn"}, {"env_step": 4700000, "rew": 4486.8, "rew_std": 643.8162470767571, "Agent": "iqn"}, {"env_step": 4800000, "rew": 4090.2, "rew_std": 1062.8171808923678, "Agent": "iqn"}, {"env_step": 4900000, "rew": 4424.2, "rew_std": 889.2979028424614, "Agent": "iqn"}, {"env_step": 5000000, "rew": 4119.8, "rew_std": 986.707433842474, "Agent": "iqn"}, {"env_step": 5100000, "rew": 4387.0, "rew_std": 1373.3178073556026, "Agent": "iqn"}, {"env_step": 5200000, "rew": 4230.2, "rew_std": 906.2165083466533, "Agent": "iqn"}, {"env_step": 5300000, "rew": 4634.0, "rew_std": 1000.1627867502369, "Agent": "iqn"}, {"env_step": 5400000, "rew": 4360.6, "rew_std": 547.0027787863604, "Agent": "iqn"}, {"env_step": 5500000, "rew": 4132.2, "rew_std": 1382.0892735275822, "Agent": "iqn"}, {"env_step": 5600000, "rew": 4627.2, "rew_std": 630.4678897453858, "Agent": "iqn"}, {"env_step": 5700000, "rew": 4543.6, "rew_std": 817.3174658601149, "Agent": "iqn"}, {"env_step": 5800000, "rew": 4541.4, "rew_std": 589.8366214469902, "Agent": "iqn"}, {"env_step": 5900000, "rew": 4541.8, "rew_std": 957.1254672194237, "Agent": "iqn"}, {"env_step": 6000000, "rew": 4616.6, "rew_std": 624.175648355493, "Agent": "iqn"}, {"env_step": 6100000, "rew": 4831.4, "rew_std": 685.3571623613486, "Agent": "iqn"}, {"env_step": 6200000, "rew": 4185.4, "rew_std": 1318.965063980089, "Agent": "iqn"}, {"env_step": 6300000, "rew": 4762.2, "rew_std": 578.2092700744256, "Agent": "iqn"}, {"env_step": 6400000, "rew": 4953.0, "rew_std": 491.9441025157228, "Agent": "iqn"}, {"env_step": 6500000, "rew": 4542.0, "rew_std": 540.8837213301949, "Agent": "iqn"}, {"env_step": 6600000, "rew": 4407.3, "rew_std": 992.3926692595023, "Agent": "iqn"}, {"env_step": 6700000, "rew": 4558.4, "rew_std": 883.3956305076453, "Agent": "iqn"}, {"env_step": 6800000, "rew": 4337.2, "rew_std": 886.7474048453709, "Agent": "iqn"}, {"env_step": 6900000, "rew": 4499.8, "rew_std": 1165.5764067619075, "Agent": "iqn"}, {"env_step": 7000000, "rew": 4851.0, "rew_std": 666.8494582737546, "Agent": "iqn"}, {"env_step": 7100000, "rew": 4711.8, "rew_std": 1179.0499395699912, "Agent": "iqn"}, {"env_step": 7200000, "rew": 5200.4, "rew_std": 528.0430285497575, "Agent": "iqn"}, {"env_step": 7300000, "rew": 4526.0, "rew_std": 615.5309902839987, "Agent": "iqn"}, {"env_step": 7400000, "rew": 4689.4, "rew_std": 1031.8333392559093, "Agent": "iqn"}, {"env_step": 7500000, "rew": 4679.8, "rew_std": 1083.7780030984204, "Agent": "iqn"}, {"env_step": 7600000, "rew": 4287.0, "rew_std": 1172.4614279369705, "Agent": "iqn"}, {"env_step": 7700000, "rew": 4314.4, "rew_std": 984.6696095645484, "Agent": "iqn"}, {"env_step": 7800000, "rew": 5033.0, "rew_std": 813.2641637254159, "Agent": "iqn"}, {"env_step": 7900000, "rew": 5103.8, "rew_std": 708.6434646562402, "Agent": "iqn"}, {"env_step": 8000000, "rew": 4809.2, "rew_std": 815.4278386221555, "Agent": "iqn"}, {"env_step": 8100000, "rew": 4326.3, "rew_std": 854.7598551640103, "Agent": "iqn"}, {"env_step": 8200000, "rew": 4424.6, "rew_std": 656.1347727410886, "Agent": "iqn"}, {"env_step": 8300000, "rew": 4463.8, "rew_std": 1188.5400960842676, "Agent": "iqn"}, {"env_step": 8400000, "rew": 4601.0, "rew_std": 1020.5477940792387, "Agent": "iqn"}, {"env_step": 8500000, "rew": 4801.4, "rew_std": 724.9215405821516, "Agent": "iqn"}, {"env_step": 8600000, "rew": 4811.2, "rew_std": 703.3446950109171, "Agent": "iqn"}, {"env_step": 8700000, "rew": 4873.2, "rew_std": 966.0274116193598, "Agent": "iqn"}, {"env_step": 8800000, "rew": 4744.0, "rew_std": 747.7903449497059, "Agent": "iqn"}, {"env_step": 8900000, "rew": 4795.2, "rew_std": 1258.5916573694583, "Agent": "iqn"}, {"env_step": 9000000, "rew": 4230.6, "rew_std": 1360.8685608830854, "Agent": "iqn"}, {"env_step": 9100000, "rew": 4927.6, "rew_std": 1000.1939012011621, "Agent": "iqn"}, {"env_step": 9200000, "rew": 4662.6, "rew_std": 837.6820637927017, "Agent": "iqn"}, {"env_step": 9300000, "rew": 4471.6, "rew_std": 785.4536523563946, "Agent": "iqn"}, {"env_step": 9400000, "rew": 5254.6, "rew_std": 424.59020243053186, "Agent": "iqn"}, {"env_step": 9500000, "rew": 5147.4, "rew_std": 802.4213606329283, "Agent": "iqn"}, {"env_step": 9600000, "rew": 4296.0, "rew_std": 1377.9889694768967, "Agent": "iqn"}, {"env_step": 9700000, "rew": 4708.8, "rew_std": 957.8067445993477, "Agent": "iqn"}, {"env_step": 9800000, "rew": 5341.2, "rew_std": 670.1965084958291, "Agent": "iqn"}, {"env_step": 9900000, "rew": 4807.4, "rew_std": 688.9702751207776, "Agent": "iqn"}, {"env_step": 10000000, "rew": 5173.0, "rew_std": 639.4342812205176, "Agent": "iqn"}, {"env_step": 0, "rew": 55.2, "rew_std": 71.61675781547221, "Agent": "rainbow"}, {"env_step": 100000, "rew": 197.4, "rew_std": 98.0124481889928, "Agent": "rainbow"}, {"env_step": 200000, "rew": 183.8, "rew_std": 83.80190928612545, "Agent": "rainbow"}, {"env_step": 300000, "rew": 341.6, "rew_std": 115.49129837351384, "Agent": "rainbow"}, {"env_step": 400000, "rew": 478.6, "rew_std": 112.00374993722309, "Agent": "rainbow"}, {"env_step": 500000, "rew": 327.8, "rew_std": 113.28000706214668, "Agent": "rainbow"}, {"env_step": 600000, "rew": 556.2, "rew_std": 241.52424308959132, "Agent": "rainbow"}, {"env_step": 700000, "rew": 778.0, "rew_std": 244.70390270692457, "Agent": "rainbow"}, {"env_step": 800000, "rew": 952.2, "rew_std": 287.64902224759953, "Agent": "rainbow"}, {"env_step": 900000, "rew": 1213.4, "rew_std": 264.05310072029073, "Agent": "rainbow"}, {"env_step": 1000000, "rew": 1398.2, "rew_std": 236.1549491329792, "Agent": "rainbow"}, {"env_step": 1100000, "rew": 1322.4, "rew_std": 223.74056404684424, "Agent": "rainbow"}, {"env_step": 1200000, "rew": 1377.0, "rew_std": 333.29416436535456, "Agent": "rainbow"}, {"env_step": 1300000, "rew": 1495.2, "rew_std": 277.13996463880846, "Agent": "rainbow"}, {"env_step": 1400000, "rew": 1431.2, "rew_std": 408.9402890398548, "Agent": "rainbow"}, {"env_step": 1500000, "rew": 1460.6, "rew_std": 341.7953188678862, "Agent": "rainbow"}, {"env_step": 1600000, "rew": 1478.6, "rew_std": 316.41750899721086, "Agent": "rainbow"}, {"env_step": 1700000, "rew": 1522.4, "rew_std": 258.7659946747254, "Agent": "rainbow"}, {"env_step": 1800000, "rew": 1574.8, "rew_std": 316.74620755424996, "Agent": "rainbow"}, {"env_step": 1900000, "rew": 1628.8, "rew_std": 394.08445795286065, "Agent": "rainbow"}, {"env_step": 2000000, "rew": 1717.2, "rew_std": 368.0708627424887, "Agent": "rainbow"}, {"env_step": 2100000, "rew": 1660.2, "rew_std": 333.7057985711366, "Agent": "rainbow"}, {"env_step": 2200000, "rew": 1685.8, "rew_std": 311.28051657628686, "Agent": "rainbow"}, {"env_step": 2300000, "rew": 1637.6, "rew_std": 437.96237281300773, "Agent": "rainbow"}, {"env_step": 2400000, "rew": 1646.0, "rew_std": 412.8253868162664, "Agent": "rainbow"}, {"env_step": 2500000, "rew": 1597.1, "rew_std": 315.6765591550947, "Agent": "rainbow"}, {"env_step": 2600000, "rew": 1661.2, "rew_std": 427.9338266601508, "Agent": "rainbow"}, {"env_step": 2700000, "rew": 1649.2, "rew_std": 378.05629210476053, "Agent": "rainbow"}, {"env_step": 2800000, "rew": 1687.8, "rew_std": 342.1618915075143, "Agent": "rainbow"}, {"env_step": 2900000, "rew": 1625.6, "rew_std": 356.8526866929826, "Agent": "rainbow"}, {"env_step": 3000000, "rew": 1646.2, "rew_std": 357.99156414641953, "Agent": "rainbow"}, {"env_step": 3100000, "rew": 1651.4, "rew_std": 435.95141931183116, "Agent": "rainbow"}, {"env_step": 3200000, "rew": 1694.2, "rew_std": 385.55253857289017, "Agent": "rainbow"}, {"env_step": 3300000, "rew": 1634.8, "rew_std": 408.7338498338497, "Agent": "rainbow"}, {"env_step": 3400000, "rew": 1658.6, "rew_std": 404.9168309665578, "Agent": "rainbow"}, {"env_step": 3500000, "rew": 1594.8, "rew_std": 372.0668757091929, "Agent": "rainbow"}, {"env_step": 3600000, "rew": 1646.4, "rew_std": 462.23093795201555, "Agent": "rainbow"}, {"env_step": 3700000, "rew": 1722.8, "rew_std": 375.3560443099325, "Agent": "rainbow"}, {"env_step": 3800000, "rew": 1726.6, "rew_std": 325.04775033831567, "Agent": "rainbow"}, {"env_step": 3900000, "rew": 1754.4, "rew_std": 369.6255402430952, "Agent": "rainbow"}, {"env_step": 4000000, "rew": 1707.2, "rew_std": 340.19782480198194, "Agent": "rainbow"}, {"env_step": 4100000, "rew": 1701.8, "rew_std": 354.3878666094538, "Agent": "rainbow"}, {"env_step": 4200000, "rew": 1657.6, "rew_std": 428.869024295297, "Agent": "rainbow"}, {"env_step": 4300000, "rew": 1686.4, "rew_std": 453.4417713444583, "Agent": "rainbow"}, {"env_step": 4400000, "rew": 1743.8, "rew_std": 245.49940936792498, "Agent": "rainbow"}, {"env_step": 4500000, "rew": 1714.6, "rew_std": 414.4838235685441, "Agent": "rainbow"}, {"env_step": 4600000, "rew": 1804.6, "rew_std": 416.3316466472372, "Agent": "rainbow"}, {"env_step": 4700000, "rew": 1770.2, "rew_std": 335.57884319485936, "Agent": "rainbow"}, {"env_step": 4800000, "rew": 1752.0, "rew_std": 379.45750750248703, "Agent": "rainbow"}, {"env_step": 4900000, "rew": 1681.8, "rew_std": 369.5407420028271, "Agent": "rainbow"}, {"env_step": 5000000, "rew": 1798.8, "rew_std": 338.7603282558334, "Agent": "rainbow"}, {"env_step": 5100000, "rew": 1814.6, "rew_std": 419.58651074599624, "Agent": "rainbow"}, {"env_step": 5200000, "rew": 1811.0, "rew_std": 331.25005660376877, "Agent": "rainbow"}, {"env_step": 5300000, "rew": 1854.4, "rew_std": 320.87106444801157, "Agent": "rainbow"}, {"env_step": 5400000, "rew": 1821.8, "rew_std": 351.93914246642134, "Agent": "rainbow"}, {"env_step": 5500000, "rew": 1869.2, "rew_std": 345.7851355972376, "Agent": "rainbow"}, {"env_step": 5600000, "rew": 1842.0, "rew_std": 353.2319351361086, "Agent": "rainbow"}, {"env_step": 5700000, "rew": 1894.3, "rew_std": 423.4864932911084, "Agent": "rainbow"}, {"env_step": 5800000, "rew": 1768.2, "rew_std": 373.75334112218985, "Agent": "rainbow"}, {"env_step": 5900000, "rew": 1788.2, "rew_std": 346.13690932924214, "Agent": "rainbow"}, {"env_step": 6000000, "rew": 1827.0, "rew_std": 370.17320270381543, "Agent": "rainbow"}, {"env_step": 6100000, "rew": 1777.0, "rew_std": 365.00767115226495, "Agent": "rainbow"}, {"env_step": 6200000, "rew": 1762.0, "rew_std": 395.1910930170365, "Agent": "rainbow"}, {"env_step": 6300000, "rew": 1882.0, "rew_std": 356.34477686644993, "Agent": "rainbow"}, {"env_step": 6400000, "rew": 1807.8, "rew_std": 391.83103501381817, "Agent": "rainbow"}, {"env_step": 6500000, "rew": 1864.4, "rew_std": 366.81635732338873, "Agent": "rainbow"}, {"env_step": 6600000, "rew": 1839.8, "rew_std": 329.80776218882414, "Agent": "rainbow"}, {"env_step": 6700000, "rew": 1803.2, "rew_std": 371.11852554136937, "Agent": "rainbow"}, {"env_step": 6800000, "rew": 1861.4, "rew_std": 354.3445216170274, "Agent": "rainbow"}, {"env_step": 6900000, "rew": 1861.8, "rew_std": 366.5399841763515, "Agent": "rainbow"}, {"env_step": 7000000, "rew": 1877.6, "rew_std": 345.57175810531743, "Agent": "rainbow"}, {"env_step": 7100000, "rew": 1860.6, "rew_std": 372.0484377067051, "Agent": "rainbow"}, {"env_step": 7200000, "rew": 1861.4, "rew_std": 343.88259624470675, "Agent": "rainbow"}, {"env_step": 7300000, "rew": 1931.6, "rew_std": 363.43890820879375, "Agent": "rainbow"}, {"env_step": 7400000, "rew": 1901.2, "rew_std": 362.95145680930943, "Agent": "rainbow"}, {"env_step": 7500000, "rew": 1897.0, "rew_std": 373.6364543242536, "Agent": "rainbow"}, {"env_step": 7600000, "rew": 1901.0, "rew_std": 359.4754511785193, "Agent": "rainbow"}, {"env_step": 7700000, "rew": 1892.6, "rew_std": 381.24957704894575, "Agent": "rainbow"}, {"env_step": 7800000, "rew": 1892.4, "rew_std": 351.7485465499467, "Agent": "rainbow"}, {"env_step": 7900000, "rew": 1915.4, "rew_std": 373.93480715226286, "Agent": "rainbow"}, {"env_step": 8000000, "rew": 1863.4, "rew_std": 332.4106496488944, "Agent": "rainbow"}, {"env_step": 8100000, "rew": 1909.0, "rew_std": 413.3543274238217, "Agent": "rainbow"}, {"env_step": 8200000, "rew": 1866.6, "rew_std": 333.5890285965652, "Agent": "rainbow"}, {"env_step": 8300000, "rew": 1887.8, "rew_std": 355.3504748836, "Agent": "rainbow"}, {"env_step": 8400000, "rew": 1912.4, "rew_std": 447.0863898621831, "Agent": "rainbow"}, {"env_step": 8500000, "rew": 1909.8, "rew_std": 365.22152181929255, "Agent": "rainbow"}, {"env_step": 8600000, "rew": 1915.4, "rew_std": 369.93734604659744, "Agent": "rainbow"}, {"env_step": 8700000, "rew": 1866.4, "rew_std": 375.5697538407479, "Agent": "rainbow"}, {"env_step": 8800000, "rew": 1877.2, "rew_std": 429.923900242822, "Agent": "rainbow"}, {"env_step": 8900000, "rew": 1829.0, "rew_std": 345.25845391532414, "Agent": "rainbow"}, {"env_step": 9000000, "rew": 1908.6, "rew_std": 339.2050117554279, "Agent": "rainbow"}, {"env_step": 9100000, "rew": 1874.0, "rew_std": 355.7178657306939, "Agent": "rainbow"}, {"env_step": 9200000, "rew": 1831.8, "rew_std": 345.1578769201132, "Agent": "rainbow"}, {"env_step": 9300000, "rew": 1870.8, "rew_std": 397.23262705875504, "Agent": "rainbow"}, {"env_step": 9400000, "rew": 1910.0, "rew_std": 358.90277234928124, "Agent": "rainbow"}, {"env_step": 9500000, "rew": 1902.4, "rew_std": 376.3017937772819, "Agent": "rainbow"}, {"env_step": 9600000, "rew": 1889.8, "rew_std": 407.2625197584477, "Agent": "rainbow"}, {"env_step": 9700000, "rew": 1912.6, "rew_std": 381.65329816470864, "Agent": "rainbow"}, {"env_step": 9800000, "rew": 1894.6, "rew_std": 346.8568004234601, "Agent": "rainbow"}, {"env_step": 9900000, "rew": 1934.6, "rew_std": 376.412592775534, "Agent": "rainbow"}, {"env_step": 10000000, "rew": 1896.0, "rew_std": 353.1526582088828, "Agent": "rainbow"}, {"env_step": 0, "rew": 149.2, "rew_std": 108.08959246847033, "Agent": "ppo"}, {"env_step": 100000, "rew": 451.8, "rew_std": 93.66087763842489, "Agent": "ppo"}, {"env_step": 200000, "rew": 548.8, "rew_std": 87.63195764103413, "Agent": "ppo"}, {"env_step": 300000, "rew": 628.6, "rew_std": 55.785661240143064, "Agent": "ppo"}, {"env_step": 400000, "rew": 712.4, "rew_std": 68.37426416423068, "Agent": "ppo"}, {"env_step": 500000, "rew": 747.4, "rew_std": 46.5536249931195, "Agent": "ppo"}, {"env_step": 600000, "rew": 758.2, "rew_std": 58.05824661492974, "Agent": "ppo"}, {"env_step": 700000, "rew": 748.8, "rew_std": 55.246357345982545, "Agent": "ppo"}, {"env_step": 800000, "rew": 781.4, "rew_std": 39.2127530275547, "Agent": "ppo"}, {"env_step": 900000, "rew": 792.4, "rew_std": 79.78370761001271, "Agent": "ppo"}, {"env_step": 1000000, "rew": 785.8, "rew_std": 37.83596172955037, "Agent": "ppo"}, {"env_step": 1100000, "rew": 824.4, "rew_std": 24.83223711227001, "Agent": "ppo"}, {"env_step": 1200000, "rew": 814.2, "rew_std": 35.104985400937004, "Agent": "ppo"}, {"env_step": 1300000, "rew": 823.0, "rew_std": 42.22084793085046, "Agent": "ppo"}, {"env_step": 1400000, "rew": 822.6, "rew_std": 22.628300864183327, "Agent": "ppo"}, {"env_step": 1500000, "rew": 824.2, "rew_std": 23.142169301947472, "Agent": "ppo"}, {"env_step": 1600000, "rew": 841.0, "rew_std": 110.87740978215535, "Agent": "ppo"}, {"env_step": 1700000, "rew": 851.8, "rew_std": 50.91522365658429, "Agent": "ppo"}, {"env_step": 1800000, "rew": 865.0, "rew_std": 90.81960140850653, "Agent": "ppo"}, {"env_step": 1900000, "rew": 872.2, "rew_std": 84.7417252597562, "Agent": "ppo"}, {"env_step": 2000000, "rew": 843.8, "rew_std": 130.17357642778353, "Agent": "ppo"}, {"env_step": 2100000, "rew": 868.0, "rew_std": 104.34557968596465, "Agent": "ppo"}, {"env_step": 2200000, "rew": 860.4, "rew_std": 135.80809990571254, "Agent": "ppo"}, {"env_step": 2300000, "rew": 884.0, "rew_std": 147.23043163694115, "Agent": "ppo"}, {"env_step": 2400000, "rew": 922.0, "rew_std": 163.5506037897751, "Agent": "ppo"}, {"env_step": 2500000, "rew": 906.8, "rew_std": 148.42425677765746, "Agent": "ppo"}, {"env_step": 2600000, "rew": 895.0, "rew_std": 125.72111994410486, "Agent": "ppo"}, {"env_step": 2700000, "rew": 920.4, "rew_std": 155.8660963776279, "Agent": "ppo"}, {"env_step": 2800000, "rew": 934.2, "rew_std": 182.06251673532364, "Agent": "ppo"}, {"env_step": 2900000, "rew": 894.0, "rew_std": 235.24625395529682, "Agent": "ppo"}, {"env_step": 3000000, "rew": 922.6, "rew_std": 170.39964788696014, "Agent": "ppo"}, {"env_step": 3100000, "rew": 933.2, "rew_std": 172.90968740935253, "Agent": "ppo"}, {"env_step": 3200000, "rew": 931.8, "rew_std": 181.70404508430735, "Agent": "ppo"}, {"env_step": 3300000, "rew": 928.4, "rew_std": 214.5931965370757, "Agent": "ppo"}, {"env_step": 3400000, "rew": 933.0, "rew_std": 203.49201458533943, "Agent": "ppo"}, {"env_step": 3500000, "rew": 958.2, "rew_std": 223.07209596899386, "Agent": "ppo"}, {"env_step": 3600000, "rew": 951.8, "rew_std": 234.61704967883298, "Agent": "ppo"}, {"env_step": 3700000, "rew": 944.0, "rew_std": 233.30666514268296, "Agent": "ppo"}, {"env_step": 3800000, "rew": 934.8, "rew_std": 249.3434579049549, "Agent": "ppo"}, {"env_step": 3900000, "rew": 925.6, "rew_std": 264.6655247666382, "Agent": "ppo"}, {"env_step": 4000000, "rew": 910.4, "rew_std": 329.20364518030476, "Agent": "ppo"}, {"env_step": 4100000, "rew": 939.6, "rew_std": 328.4677153085216, "Agent": "ppo"}, {"env_step": 4200000, "rew": 925.8, "rew_std": 297.59227140502156, "Agent": "ppo"}, {"env_step": 4300000, "rew": 947.8, "rew_std": 306.41599174977796, "Agent": "ppo"}, {"env_step": 4400000, "rew": 938.0, "rew_std": 307.99350642505436, "Agent": "ppo"}, {"env_step": 4500000, "rew": 885.0, "rew_std": 320.93145685644464, "Agent": "ppo"}, {"env_step": 4600000, "rew": 937.4, "rew_std": 259.07226790994054, "Agent": "ppo"}, {"env_step": 4700000, "rew": 932.6, "rew_std": 310.10198322487395, "Agent": "ppo"}, {"env_step": 4800000, "rew": 906.4, "rew_std": 359.8041689586156, "Agent": "ppo"}, {"env_step": 4900000, "rew": 887.0, "rew_std": 377.4125064170503, "Agent": "ppo"}, {"env_step": 5000000, "rew": 901.6, "rew_std": 373.16462854884844, "Agent": "ppo"}, {"env_step": 5100000, "rew": 899.6, "rew_std": 394.11145631661105, "Agent": "ppo"}, {"env_step": 5200000, "rew": 871.2, "rew_std": 410.2703498913856, "Agent": "ppo"}, {"env_step": 5300000, "rew": 859.8, "rew_std": 379.5180628112449, "Agent": "ppo"}, {"env_step": 5400000, "rew": 908.4, "rew_std": 370.1597492975161, "Agent": "ppo"}, {"env_step": 5500000, "rew": 858.8, "rew_std": 425.68175906420987, "Agent": "ppo"}, {"env_step": 5600000, "rew": 917.0, "rew_std": 371.99059127886557, "Agent": "ppo"}, {"env_step": 5700000, "rew": 926.0, "rew_std": 387.98144285519635, "Agent": "ppo"}, {"env_step": 5800000, "rew": 924.4, "rew_std": 380.3491027989944, "Agent": "ppo"}, {"env_step": 5900000, "rew": 942.8, "rew_std": 393.7331075741536, "Agent": "ppo"}, {"env_step": 6000000, "rew": 953.0, "rew_std": 385.58864091152896, "Agent": "ppo"}, {"env_step": 6100000, "rew": 931.0, "rew_std": 386.86871158055675, "Agent": "ppo"}, {"env_step": 6200000, "rew": 947.2, "rew_std": 389.32474876380513, "Agent": "ppo"}, {"env_step": 6300000, "rew": 954.4, "rew_std": 382.7979101301365, "Agent": "ppo"}, {"env_step": 6400000, "rew": 966.4, "rew_std": 395.44081731657394, "Agent": "ppo"}, {"env_step": 6500000, "rew": 952.8, "rew_std": 410.60073063744056, "Agent": "ppo"}, {"env_step": 6600000, "rew": 952.2, "rew_std": 401.42491203212586, "Agent": "ppo"}, {"env_step": 6700000, "rew": 974.0, "rew_std": 328.4058464765815, "Agent": "ppo"}, {"env_step": 6800000, "rew": 992.8, "rew_std": 326.9534523445195, "Agent": "ppo"}, {"env_step": 6900000, "rew": 986.8, "rew_std": 301.4514222888988, "Agent": "ppo"}, {"env_step": 7000000, "rew": 1018.6, "rew_std": 286.2880367741551, "Agent": "ppo"}, {"env_step": 7100000, "rew": 995.0, "rew_std": 302.6037012331475, "Agent": "ppo"}, {"env_step": 7200000, "rew": 986.4, "rew_std": 286.39804468606275, "Agent": "ppo"}, {"env_step": 7300000, "rew": 997.6, "rew_std": 314.9346598899524, "Agent": "ppo"}, {"env_step": 7400000, "rew": 1001.6, "rew_std": 297.1273127802289, "Agent": "ppo"}, {"env_step": 7500000, "rew": 986.2, "rew_std": 317.547413782572, "Agent": "ppo"}, {"env_step": 7600000, "rew": 1022.6, "rew_std": 319.39574198789813, "Agent": "ppo"}, {"env_step": 7700000, "rew": 1019.6, "rew_std": 320.0022499920899, "Agent": "ppo"}, {"env_step": 7800000, "rew": 1003.2, "rew_std": 321.1394712582058, "Agent": "ppo"}, {"env_step": 7900000, "rew": 1024.8, "rew_std": 323.7668296783968, "Agent": "ppo"}, {"env_step": 8000000, "rew": 1018.2, "rew_std": 316.27260393527604, "Agent": "ppo"}, {"env_step": 8100000, "rew": 1014.0, "rew_std": 307.1468704056742, "Agent": "ppo"}, {"env_step": 8200000, "rew": 1004.0, "rew_std": 307.0244289954791, "Agent": "ppo"}, {"env_step": 8300000, "rew": 1017.0, "rew_std": 338.5513255032389, "Agent": "ppo"}, {"env_step": 8400000, "rew": 1018.2, "rew_std": 328.83363574914296, "Agent": "ppo"}, {"env_step": 8500000, "rew": 1000.2, "rew_std": 347.273321751038, "Agent": "ppo"}, {"env_step": 8600000, "rew": 1010.0, "rew_std": 339.7281265953704, "Agent": "ppo"}, {"env_step": 8700000, "rew": 1023.2, "rew_std": 338.9633608518773, "Agent": "ppo"}, {"env_step": 8800000, "rew": 1024.2, "rew_std": 358.4092074710135, "Agent": "ppo"}, {"env_step": 8900000, "rew": 953.8, "rew_std": 392.8872102779626, "Agent": "ppo"}, {"env_step": 9000000, "rew": 951.4, "rew_std": 427.01105372109515, "Agent": "ppo"}, {"env_step": 9100000, "rew": 979.0, "rew_std": 400.3560914985558, "Agent": "ppo"}, {"env_step": 9200000, "rew": 991.6, "rew_std": 416.51199262446204, "Agent": "ppo"}, {"env_step": 9300000, "rew": 989.2, "rew_std": 423.34780027773854, "Agent": "ppo"}, {"env_step": 9400000, "rew": 993.2, "rew_std": 427.4821165850099, "Agent": "ppo"}, {"env_step": 9500000, "rew": 978.2, "rew_std": 419.3723405280801, "Agent": "ppo"}, {"env_step": 9600000, "rew": 992.2, "rew_std": 368.224333796668, "Agent": "ppo"}, {"env_step": 9700000, "rew": 1026.4, "rew_std": 379.41723735223206, "Agent": "ppo"}, {"env_step": 9800000, "rew": 1025.2, "rew_std": 368.4651408206752, "Agent": "ppo"}, {"env_step": 9900000, "rew": 1035.2, "rew_std": 353.61696791867894, "Agent": "ppo"}, {"env_step": 10000000, "rew": 1025.8, "rew_std": 358.64461518333155, "Agent": "ppo"}]
examples/atari/benchmark/SpaceInvadersNoFrameskip-v4/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"env_step": 0, "rew": 131.5, "rew_std": 72.94964016360875, "Agent": "c51"}, {"env_step": 100000, "rew": 175.3, "rew_std": 78.69695038564073, "Agent": "c51"}, {"env_step": 200000, "rew": 162.45, "rew_std": 70.4969680199085, "Agent": "c51"}, {"env_step": 300000, "rew": 216.1, "rew_std": 43.270544253568154, "Agent": "c51"}, {"env_step": 400000, "rew": 310.5, "rew_std": 67.2647753285477, "Agent": "c51"}, {"env_step": 500000, "rew": 371.6, "rew_std": 66.80149698921424, "Agent": "c51"}, {"env_step": 600000, "rew": 429.15, "rew_std": 59.84816204362503, "Agent": "c51"}, {"env_step": 700000, "rew": 411.25, "rew_std": 67.41819116529307, "Agent": "c51"}, {"env_step": 800000, "rew": 433.9, "rew_std": 56.295115241022465, "Agent": "c51"}, {"env_step": 900000, "rew": 468.25, "rew_std": 55.36661900459518, "Agent": "c51"}, {"env_step": 1000000, "rew": 479.2, "rew_std": 77.91508198032008, "Agent": "c51"}, {"env_step": 1100000, "rew": 517.25, "rew_std": 102.26613564616588, "Agent": "c51"}, {"env_step": 1200000, "rew": 511.4, "rew_std": 65.065659145205, "Agent": "c51"}, {"env_step": 1300000, "rew": 573.65, "rew_std": 58.454277003483675, "Agent": "c51"}, {"env_step": 1400000, "rew": 556.35, "rew_std": 71.15758919468814, "Agent": "c51"}, {"env_step": 1500000, "rew": 553.5, "rew_std": 38.90629769073382, "Agent": "c51"}, {"env_step": 1600000, "rew": 608.25, "rew_std": 35.925791570959156, "Agent": "c51"}, {"env_step": 1700000, "rew": 571.35, "rew_std": 70.93273221863092, "Agent": "c51"}, {"env_step": 1800000, "rew": 624.95, "rew_std": 94.19406828457936, "Agent": "c51"}, {"env_step": 1900000, "rew": 640.2, "rew_std": 96.80113635696638, "Agent": "c51"}, {"env_step": 2000000, "rew": 617.15, "rew_std": 67.16139143883188, "Agent": "c51"}, {"env_step": 2100000, "rew": 617.65, "rew_std": 94.80454894149331, "Agent": "c51"}, {"env_step": 2200000, "rew": 613.0, "rew_std": 115.9631406956538, "Agent": "c51"}, {"env_step": 2300000, "rew": 659.9, "rew_std": 81.69969400187493, "Agent": "c51"}, {"env_step": 2400000, "rew": 698.0, "rew_std": 80.07558928912107, "Agent": "c51"}, {"env_step": 2500000, "rew": 651.8, "rew_std": 74.98106427625578, "Agent": "c51"}, {"env_step": 2600000, "rew": 619.8, "rew_std": 103.08035700365032, "Agent": "c51"}, {"env_step": 2700000, "rew": 629.75, "rew_std": 97.50211536166792, "Agent": "c51"}, {"env_step": 2800000, "rew": 655.95, "rew_std": 62.26975590123989, "Agent": "c51"}, {"env_step": 2900000, "rew": 722.85, "rew_std": 111.88946554524246, "Agent": "c51"}, {"env_step": 3000000, "rew": 689.7, "rew_std": 116.6568043450531, "Agent": "c51"}, {"env_step": 3100000, "rew": 738.45, "rew_std": 90.73572890543173, "Agent": "c51"}, {"env_step": 3200000, "rew": 742.85, "rew_std": 114.76150269145138, "Agent": "c51"}, {"env_step": 3300000, "rew": 722.95, "rew_std": 115.68069199308933, "Agent": "c51"}, {"env_step": 3400000, "rew": 775.55, "rew_std": 107.4728919309423, "Agent": "c51"}, {"env_step": 3500000, "rew": 796.25, "rew_std": 82.22902468106989, "Agent": "c51"}, {"env_step": 3600000, "rew": 742.3, "rew_std": 119.33360800713268, "Agent": "c51"}, {"env_step": 3700000, "rew": 758.55, "rew_std": 96.7464340428111, "Agent": "c51"}, {"env_step": 3800000, "rew": 664.4, "rew_std": 103.92925478420403, "Agent": "c51"}, {"env_step": 3900000, "rew": 738.85, "rew_std": 97.65578579889673, "Agent": "c51"}, {"env_step": 4000000, "rew": 689.45, "rew_std": 113.2767518072442, "Agent": "c51"}, {"env_step": 4100000, "rew": 832.35, "rew_std": 157.338973239309, "Agent": "c51"}, {"env_step": 4200000, "rew": 672.15, "rew_std": 78.5875467233836, "Agent": "c51"}, {"env_step": 4300000, "rew": 722.05, "rew_std": 77.35387837723458, "Agent": "c51"}, {"env_step": 4400000, "rew": 897.7, "rew_std": 116.28654264359226, "Agent": "c51"}, {"env_step": 4500000, "rew": 823.1, "rew_std": 92.00076086641893, "Agent": "c51"}, {"env_step": 4600000, "rew": 690.8, "rew_std": 81.19273366502695, "Agent": "c51"}, {"env_step": 4700000, "rew": 811.15, "rew_std": 99.55050225890375, "Agent": "c51"}, {"env_step": 4800000, "rew": 755.0, "rew_std": 107.04998832321282, "Agent": "c51"}, {"env_step": 4900000, "rew": 805.75, "rew_std": 115.61017472523774, "Agent": "c51"}, {"env_step": 5000000, "rew": 760.75, "rew_std": 112.35040053333142, "Agent": "c51"}, {"env_step": 5100000, "rew": 820.95, "rew_std": 141.30984572916356, "Agent": "c51"}, {"env_step": 5200000, "rew": 797.1, "rew_std": 154.61869227231227, "Agent": "c51"}, {"env_step": 5300000, "rew": 825.8, "rew_std": 151.04969380968635, "Agent": "c51"}, {"env_step": 5400000, "rew": 787.95, "rew_std": 107.38399554868501, "Agent": "c51"}, {"env_step": 5500000, "rew": 825.0, "rew_std": 65.9052349969257, "Agent": "c51"}, {"env_step": 5600000, "rew": 822.5, "rew_std": 149.53043168532616, "Agent": "c51"}, {"env_step": 5700000, "rew": 865.65, "rew_std": 141.1168044564502, "Agent": "c51"}, {"env_step": 5800000, "rew": 756.65, "rew_std": 111.32251569201982, "Agent": "c51"}, {"env_step": 5900000, "rew": 833.3, "rew_std": 179.73886613640354, "Agent": "c51"}, {"env_step": 6000000, "rew": 838.7, "rew_std": 96.8695514596821, "Agent": "c51"}, {"env_step": 6100000, "rew": 797.05, "rew_std": 143.24549731143384, "Agent": "c51"}, {"env_step": 6200000, "rew": 787.65, "rew_std": 143.62260441866385, "Agent": "c51"}, {"env_step": 6300000, "rew": 836.35, "rew_std": 147.87225060842215, "Agent": "c51"}, {"env_step": 6400000, "rew": 936.1, "rew_std": 174.2318570181699, "Agent": "c51"}, {"env_step": 6500000, "rew": 878.6, "rew_std": 130.01822949109868, "Agent": "c51"}, {"env_step": 6600000, "rew": 869.35, "rew_std": 131.44923925226803, "Agent": "c51"}, {"env_step": 6700000, "rew": 831.1, "rew_std": 128.2900619689616, "Agent": "c51"}, {"env_step": 6800000, "rew": 848.35, "rew_std": 173.9251347563083, "Agent": "c51"}, {"env_step": 6900000, "rew": 833.4, "rew_std": 157.15466903658958, "Agent": "c51"}, {"env_step": 7000000, "rew": 832.3, "rew_std": 154.00165583525393, "Agent": "c51"}, {"env_step": 7100000, "rew": 832.85, "rew_std": 96.22423031648525, "Agent": "c51"}, {"env_step": 7200000, "rew": 867.75, "rew_std": 155.06728378352412, "Agent": "c51"}, {"env_step": 7300000, "rew": 881.55, "rew_std": 176.80051611915616, "Agent": "c51"}, {"env_step": 7400000, "rew": 848.7, "rew_std": 122.09365257866602, "Agent": "c51"}, {"env_step": 7500000, "rew": 891.75, "rew_std": 136.24688069823839, "Agent": "c51"}, {"env_step": 7600000, "rew": 947.85, "rew_std": 155.2700953178042, "Agent": "c51"}, {"env_step": 7700000, "rew": 810.6, "rew_std": 61.26001958863546, "Agent": "c51"}, {"env_step": 7800000, "rew": 809.45, "rew_std": 132.21695239264895, "Agent": "c51"}, {"env_step": 7900000, "rew": 933.9, "rew_std": 128.37421859547965, "Agent": "c51"}, {"env_step": 8000000, "rew": 859.35, "rew_std": 175.26181129955265, "Agent": "c51"}, {"env_step": 8100000, "rew": 922.05, "rew_std": 125.99334307811664, "Agent": "c51"}, {"env_step": 8200000, "rew": 878.3, "rew_std": 114.7343017584541, "Agent": "c51"}, {"env_step": 8300000, "rew": 895.25, "rew_std": 212.38682280216915, "Agent": "c51"}, {"env_step": 8400000, "rew": 877.2, "rew_std": 165.07804214976625, "Agent": "c51"}, {"env_step": 8500000, "rew": 872.55, "rew_std": 171.04918152391141, "Agent": "c51"}, {"env_step": 8600000, "rew": 921.95, "rew_std": 176.95118677194566, "Agent": "c51"}, {"env_step": 8700000, "rew": 881.1, "rew_std": 133.7792584820233, "Agent": "c51"}, {"env_step": 8800000, "rew": 875.65, "rew_std": 134.4557641010604, "Agent": "c51"}, {"env_step": 8900000, "rew": 865.25, "rew_std": 158.70353650753975, "Agent": "c51"}, {"env_step": 9000000, "rew": 873.9, "rew_std": 141.55260506257028, "Agent": "c51"}, {"env_step": 9100000, "rew": 923.35, "rew_std": 146.47082473994607, "Agent": "c51"}, {"env_step": 9200000, "rew": 894.5, "rew_std": 181.60740623664003, "Agent": "c51"}, {"env_step": 9300000, "rew": 873.55, "rew_std": 141.41206631684582, "Agent": "c51"}, {"env_step": 9400000, "rew": 919.55, "rew_std": 149.74168591277447, "Agent": "c51"}, {"env_step": 9500000, "rew": 886.55, "rew_std": 105.35142381572258, "Agent": "c51"}, {"env_step": 9600000, "rew": 860.15, "rew_std": 190.00829587152242, "Agent": "c51"}, {"env_step": 9700000, "rew": 919.65, "rew_std": 205.74645197426855, "Agent": "c51"}, {"env_step": 9800000, "rew": 877.75, "rew_std": 124.24014045388068, "Agent": "c51"}, {"env_step": 9900000, "rew": 880.1, "rew_std": 104.3469692899607, "Agent": "c51"}, {"env_step": 10000000, "rew": 880.55, "rew_std": 143.47516335589236, "Agent": "c51"}, {"env_step": 0, "rew": 189.2, "rew_std": 81.34807926435633, "Agent": "dqn"}, {"env_step": 100000, "rew": 245.5, "rew_std": 101.77032966439678, "Agent": "dqn"}, {"env_step": 200000, "rew": 230.55, "rew_std": 65.04092942140356, "Agent": "dqn"}, {"env_step": 300000, "rew": 289.9, "rew_std": 61.85903329344874, "Agent": "dqn"}, {"env_step": 400000, "rew": 272.75, "rew_std": 70.29838191594456, "Agent": "dqn"}, {"env_step": 500000, "rew": 295.65, "rew_std": 66.4168841485356, "Agent": "dqn"}, {"env_step": 600000, "rew": 313.0, "rew_std": 92.10130292237999, "Agent": "dqn"}, {"env_step": 700000, "rew": 321.55, "rew_std": 54.48323136525586, "Agent": "dqn"}, {"env_step": 800000, "rew": 398.9, "rew_std": 85.19647880047626, "Agent": "dqn"}, {"env_step": 900000, "rew": 368.95, "rew_std": 76.96442359947874, "Agent": "dqn"}, {"env_step": 1000000, "rew": 365.3, "rew_std": 76.99974025930217, "Agent": "dqn"}, {"env_step": 1100000, "rew": 436.35, "rew_std": 96.35638276730815, "Agent": "dqn"}, {"env_step": 1200000, "rew": 403.95, "rew_std": 62.60089855585142, "Agent": "dqn"}, {"env_step": 1300000, "rew": 449.65, "rew_std": 59.11558593129226, "Agent": "dqn"}, {"env_step": 1400000, "rew": 420.15, "rew_std": 76.2200924953519, "Agent": "dqn"}, {"env_step": 1500000, "rew": 467.6, "rew_std": 49.78092003970999, "Agent": "dqn"}, {"env_step": 1600000, "rew": 407.05, "rew_std": 62.21030863128715, "Agent": "dqn"}, {"env_step": 1700000, "rew": 471.3, "rew_std": 63.85264285838136, "Agent": "dqn"}, {"env_step": 1800000, "rew": 440.8, "rew_std": 76.10525606027484, "Agent": "dqn"}, {"env_step": 1900000, "rew": 511.2, "rew_std": 51.454931736423475, "Agent": "dqn"}, {"env_step": 2000000, "rew": 446.15, "rew_std": 89.50224857510564, "Agent": "dqn"}, {"env_step": 2100000, "rew": 512.9, "rew_std": 72.38224920517462, "Agent": "dqn"}, {"env_step": 2200000, "rew": 458.2, "rew_std": 57.65682960413277, "Agent": "dqn"}, {"env_step": 2300000, "rew": 443.95, "rew_std": 83.05884961880943, "Agent": "dqn"}, {"env_step": 2400000, "rew": 429.95, "rew_std": 97.29631287977978, "Agent": "dqn"}, {"env_step": 2500000, "rew": 518.2, "rew_std": 83.67950764673512, "Agent": "dqn"}, {"env_step": 2600000, "rew": 500.35, "rew_std": 86.74043174898313, "Agent": "dqn"}, {"env_step": 2700000, "rew": 472.85, "rew_std": 108.66325275823469, "Agent": "dqn"}, {"env_step": 2800000, "rew": 483.6, "rew_std": 62.72750592842027, "Agent": "dqn"}, {"env_step": 2900000, "rew": 442.75, "rew_std": 133.11221769619797, "Agent": "dqn"}, {"env_step": 3000000, "rew": 496.85, "rew_std": 91.74694817812743, "Agent": "dqn"}, {"env_step": 3100000, "rew": 488.3, "rew_std": 120.88097451625711, "Agent": "dqn"}, {"env_step": 3200000, "rew": 496.5, "rew_std": 77.99935897172489, "Agent": "dqn"}, {"env_step": 3300000, "rew": 489.05, "rew_std": 103.76065005578945, "Agent": "dqn"}, {"env_step": 3400000, "rew": 499.35, "rew_std": 79.70321511708295, "Agent": "dqn"}, {"env_step": 3500000, "rew": 518.35, "rew_std": 78.03109956933838, "Agent": "dqn"}, {"env_step": 3600000, "rew": 521.85, "rew_std": 87.429986274733, "Agent": "dqn"}, {"env_step": 3700000, "rew": 560.4, "rew_std": 98.244796299855, "Agent": "dqn"}, {"env_step": 3800000, "rew": 551.25, "rew_std": 61.10615762752556, "Agent": "dqn"}, {"env_step": 3900000, "rew": 520.0, "rew_std": 123.95079668965424, "Agent": "dqn"}, {"env_step": 4000000, "rew": 568.7, "rew_std": 94.99899999473679, "Agent": "dqn"}, {"env_step": 4100000, "rew": 540.9, "rew_std": 120.96668962983156, "Agent": "dqn"}, {"env_step": 4200000, "rew": 542.15, "rew_std": 113.86111056897346, "Agent": "dqn"}, {"env_step": 4300000, "rew": 564.95, "rew_std": 118.0547436573389, "Agent": "dqn"}, {"env_step": 4400000, "rew": 560.8, "rew_std": 101.43401796241733, "Agent": "dqn"}, {"env_step": 4500000, "rew": 572.3, "rew_std": 100.23228022947497, "Agent": "dqn"}, {"env_step": 4600000, "rew": 577.25, "rew_std": 122.57757747646998, "Agent": "dqn"}, {"env_step": 4700000, "rew": 597.7, "rew_std": 134.22410364759378, "Agent": "dqn"}, {"env_step": 4800000, "rew": 561.1, "rew_std": 95.81330805269172, "Agent": "dqn"}, {"env_step": 4900000, "rew": 556.8, "rew_std": 126.28364898117255, "Agent": "dqn"}, {"env_step": 5000000, "rew": 604.65, "rew_std": 157.45920900347494, "Agent": "dqn"}, {"env_step": 5100000, "rew": 551.65, "rew_std": 108.79270425906326, "Agent": "dqn"}, {"env_step": 5200000, "rew": 511.25, "rew_std": 115.59849696254706, "Agent": "dqn"}, {"env_step": 5300000, "rew": 551.55, "rew_std": 108.7601604448982, "Agent": "dqn"}, {"env_step": 5400000, "rew": 569.1, "rew_std": 100.45765276971187, "Agent": "dqn"}, {"env_step": 5500000, "rew": 502.8, "rew_std": 87.80495430213492, "Agent": "dqn"}, {"env_step": 5600000, "rew": 569.9, "rew_std": 101.9560689709053, "Agent": "dqn"}, {"env_step": 5700000, "rew": 518.95, "rew_std": 81.64326365353115, "Agent": "dqn"}, {"env_step": 5800000, "rew": 488.6, "rew_std": 77.79293027004447, "Agent": "dqn"}, {"env_step": 5900000, "rew": 546.3, "rew_std": 118.97693053697428, "Agent": "dqn"}, {"env_step": 6000000, "rew": 554.15, "rew_std": 153.27508766919692, "Agent": "dqn"}, {"env_step": 6100000, "rew": 515.95, "rew_std": 145.70954841739095, "Agent": "dqn"}, {"env_step": 6200000, "rew": 475.1, "rew_std": 145.77187657432418, "Agent": "dqn"}, {"env_step": 6300000, "rew": 559.6, "rew_std": 122.7236326059492, "Agent": "dqn"}, {"env_step": 6400000, "rew": 511.9, "rew_std": 133.77664220632838, "Agent": "dqn"}, {"env_step": 6500000, "rew": 543.75, "rew_std": 91.17270699063398, "Agent": "dqn"}, {"env_step": 6600000, "rew": 521.35, "rew_std": 109.17922192432037, "Agent": "dqn"}, {"env_step": 6700000, "rew": 516.6, "rew_std": 75.09121120344244, "Agent": "dqn"}, {"env_step": 6800000, "rew": 583.9, "rew_std": 113.4287882329702, "Agent": "dqn"}, {"env_step": 6900000, "rew": 526.35, "rew_std": 107.32288898459639, "Agent": "dqn"}, {"env_step": 7000000, "rew": 540.0, "rew_std": 147.050841548085, "Agent": "dqn"}, {"env_step": 7100000, "rew": 521.75, "rew_std": 136.49674904553586, "Agent": "dqn"}, {"env_step": 7200000, "rew": 561.3, "rew_std": 84.21229126439917, "Agent": "dqn"}, {"env_step": 7300000, "rew": 491.65, "rew_std": 89.11146110349667, "Agent": "dqn"}, {"env_step": 7400000, "rew": 521.25, "rew_std": 41.02392594572099, "Agent": "dqn"}, {"env_step": 7500000, "rew": 501.35, "rew_std": 102.60459297711775, "Agent": "dqn"}, {"env_step": 7600000, "rew": 491.5, "rew_std": 119.28767748598344, "Agent": "dqn"}, {"env_step": 7700000, "rew": 503.5, "rew_std": 155.50707379408823, "Agent": "dqn"}, {"env_step": 7800000, "rew": 504.8, "rew_std": 110.91780740710664, "Agent": "dqn"}, {"env_step": 7900000, "rew": 551.55, "rew_std": 103.75245780221306, "Agent": "dqn"}, {"env_step": 8000000, "rew": 528.65, "rew_std": 35.64621298258764, "Agent": "dqn"}, {"env_step": 8100000, "rew": 521.05, "rew_std": 94.89137210516033, "Agent": "dqn"}, {"env_step": 8200000, "rew": 519.65, "rew_std": 79.50504700960813, "Agent": "dqn"}, {"env_step": 8300000, "rew": 544.2, "rew_std": 153.77226668030877, "Agent": "dqn"}, {"env_step": 8400000, "rew": 540.2, "rew_std": 130.77427116982912, "Agent": "dqn"}, {"env_step": 8500000, "rew": 524.4, "rew_std": 132.1672803684785, "Agent": "dqn"}, {"env_step": 8600000, "rew": 572.25, "rew_std": 186.15480788848834, "Agent": "dqn"}, {"env_step": 8700000, "rew": 564.35, "rew_std": 140.56280624688736, "Agent": "dqn"}, {"env_step": 8800000, "rew": 486.55, "rew_std": 130.3735881994509, "Agent": "dqn"}, {"env_step": 8900000, "rew": 576.0, "rew_std": 83.01776918226604, "Agent": "dqn"}, {"env_step": 9000000, "rew": 553.25, "rew_std": 74.99208291546515, "Agent": "dqn"}, {"env_step": 9100000, "rew": 488.35, "rew_std": 62.5244152311719, "Agent": "dqn"}, {"env_step": 9200000, "rew": 557.05, "rew_std": 100.2916870931983, "Agent": "dqn"}, {"env_step": 9300000, "rew": 541.2, "rew_std": 124.49461835758201, "Agent": "dqn"}, {"env_step": 9400000, "rew": 445.8, "rew_std": 82.59031420209031, "Agent": "dqn"}, {"env_step": 9500000, "rew": 530.65, "rew_std": 123.81034892124325, "Agent": "dqn"}, {"env_step": 9600000, "rew": 553.6, "rew_std": 85.22347094550891, "Agent": "dqn"}, {"env_step": 9700000, "rew": 590.65, "rew_std": 110.74724601542019, "Agent": "dqn"}, {"env_step": 9800000, "rew": 561.0, "rew_std": 100.72661018817222, "Agent": "dqn"}, {"env_step": 9900000, "rew": 525.75, "rew_std": 88.93038007340348, "Agent": "dqn"}, {"env_step": 10000000, "rew": 522.1, "rew_std": 134.1502515838118, "Agent": "dqn"}, {"env_step": 0, "rew": 230.2, "rew_std": 115.47709729639034, "Agent": "fqf"}, {"env_step": 100000, "rew": 197.8, "rew_std": 50.49366296873302, "Agent": "fqf"}, {"env_step": 200000, "rew": 274.45, "rew_std": 80.67882311982494, "Agent": "fqf"}, {"env_step": 300000, "rew": 331.75, "rew_std": 106.72845215780092, "Agent": "fqf"}, {"env_step": 400000, "rew": 342.3, "rew_std": 107.6478518132155, "Agent": "fqf"}, {"env_step": 500000, "rew": 344.15, "rew_std": 89.13586539659555, "Agent": "fqf"}, {"env_step": 600000, "rew": 418.7, "rew_std": 101.0124249783164, "Agent": "fqf"}, {"env_step": 700000, "rew": 455.75, "rew_std": 68.63499471843791, "Agent": "fqf"}, {"env_step": 800000, "rew": 513.6, "rew_std": 56.154608003261856, "Agent": "fqf"}, {"env_step": 900000, "rew": 530.0, "rew_std": 71.92808908903392, "Agent": "fqf"}, {"env_step": 1000000, "rew": 524.25, "rew_std": 79.38332633494265, "Agent": "fqf"}, {"env_step": 1100000, "rew": 552.4, "rew_std": 55.314916613875496, "Agent": "fqf"}, {"env_step": 1200000, "rew": 592.0, "rew_std": 123.01138158723363, "Agent": "fqf"}, {"env_step": 1300000, "rew": 626.2, "rew_std": 132.57247074713513, "Agent": "fqf"}, {"env_step": 1400000, "rew": 666.45, "rew_std": 91.45120283517325, "Agent": "fqf"}, {"env_step": 1500000, "rew": 633.95, "rew_std": 123.25632843793458, "Agent": "fqf"}, {"env_step": 1600000, "rew": 672.8, "rew_std": 103.08981520984506, "Agent": "fqf"}, {"env_step": 1700000, "rew": 617.5, "rew_std": 140.43023178788818, "Agent": "fqf"}, {"env_step": 1800000, "rew": 673.4, "rew_std": 67.20520813151315, "Agent": "fqf"}, {"env_step": 1900000, "rew": 668.5, "rew_std": 96.4898958440727, "Agent": "fqf"}, {"env_step": 2000000, "rew": 667.75, "rew_std": 174.6682927723289, "Agent": "fqf"}, {"env_step": 2100000, "rew": 699.8, "rew_std": 121.638028593035, "Agent": "fqf"}, {"env_step": 2200000, "rew": 714.25, "rew_std": 161.04211405716208, "Agent": "fqf"}, {"env_step": 2300000, "rew": 747.05, "rew_std": 150.87817105201134, "Agent": "fqf"}, {"env_step": 2400000, "rew": 735.6, "rew_std": 94.85483646077304, "Agent": "fqf"}, {"env_step": 2500000, "rew": 686.05, "rew_std": 107.51405722043978, "Agent": "fqf"}, {"env_step": 2600000, "rew": 727.95, "rew_std": 72.91243035312978, "Agent": "fqf"}, {"env_step": 2700000, "rew": 804.25, "rew_std": 120.79305650574456, "Agent": "fqf"}, {"env_step": 2800000, "rew": 799.6, "rew_std": 149.8772497746072, "Agent": "fqf"}, {"env_step": 2900000, "rew": 837.3, "rew_std": 153.77031573096286, "Agent": "fqf"}, {"env_step": 3000000, "rew": 825.05, "rew_std": 126.28864754996785, "Agent": "fqf"}, {"env_step": 3100000, "rew": 897.25, "rew_std": 165.72601636435965, "Agent": "fqf"}, {"env_step": 3200000, "rew": 835.4, "rew_std": 150.30997970860085, "Agent": "fqf"}, {"env_step": 3300000, "rew": 886.3, "rew_std": 185.1247147195641, "Agent": "fqf"}, {"env_step": 3400000, "rew": 787.0, "rew_std": 143.65931922433714, "Agent": "fqf"}, {"env_step": 3500000, "rew": 887.85, "rew_std": 202.29138019203884, "Agent": "fqf"}, {"env_step": 3600000, "rew": 860.05, "rew_std": 139.24967683984045, "Agent": "fqf"}, {"env_step": 3700000, "rew": 864.55, "rew_std": 175.68529392069217, "Agent": "fqf"}, {"env_step": 3800000, "rew": 982.3, "rew_std": 242.51723650083102, "Agent": "fqf"}, {"env_step": 3900000, "rew": 976.7, "rew_std": 136.36333818149217, "Agent": "fqf"}, {"env_step": 4000000, "rew": 940.7, "rew_std": 151.9544668642551, "Agent": "fqf"}, {"env_step": 4100000, "rew": 923.85, "rew_std": 171.70586041250894, "Agent": "fqf"}, {"env_step": 4200000, "rew": 1001.85, "rew_std": 166.15249772422925, "Agent": "fqf"}, {"env_step": 4300000, "rew": 1156.5, "rew_std": 218.98983994697107, "Agent": "fqf"}, {"env_step": 4400000, "rew": 1059.3, "rew_std": 177.61998761400702, "Agent": "fqf"}, {"env_step": 4500000, "rew": 1082.8, "rew_std": 197.79777551833087, "Agent": "fqf"}, {"env_step": 4600000, "rew": 1097.7, "rew_std": 135.50944616520283, "Agent": "fqf"}, {"env_step": 4700000, "rew": 1051.95, "rew_std": 234.61686320467248, "Agent": "fqf"}, {"env_step": 4800000, "rew": 967.5, "rew_std": 162.95152653473363, "Agent": "fqf"}, {"env_step": 4900000, "rew": 987.2, "rew_std": 236.65536123232027, "Agent": "fqf"}, {"env_step": 5000000, "rew": 1005.5, "rew_std": 246.8736518950534, "Agent": "fqf"}, {"env_step": 5100000, "rew": 1098.95, "rew_std": 251.61572387273415, "Agent": "fqf"}, {"env_step": 5200000, "rew": 1028.55, "rew_std": 254.31441661848427, "Agent": "fqf"}, {"env_step": 5300000, "rew": 1025.2, "rew_std": 199.16051315459094, "Agent": "fqf"}, {"env_step": 5400000, "rew": 1034.65, "rew_std": 224.61445300781517, "Agent": "fqf"}, {"env_step": 5500000, "rew": 1263.2, "rew_std": 179.57897983895555, "Agent": "fqf"}, {"env_step": 5600000, "rew": 1016.95, "rew_std": 171.7589080659283, "Agent": "fqf"}, {"env_step": 5700000, "rew": 1224.7, "rew_std": 191.47979005628764, "Agent": "fqf"}, {"env_step": 5800000, "rew": 1192.7, "rew_std": 211.82247283987599, "Agent": "fqf"}, {"env_step": 5900000, "rew": 1256.45, "rew_std": 423.3461024977081, "Agent": "fqf"}, {"env_step": 6000000, "rew": 1206.3, "rew_std": 349.3533454827648, "Agent": "fqf"}, {"env_step": 6100000, "rew": 1323.8, "rew_std": 327.38587324440255, "Agent": "fqf"}, {"env_step": 6200000, "rew": 1459.2, "rew_std": 292.0249304425908, "Agent": "fqf"}, {"env_step": 6300000, "rew": 1187.2, "rew_std": 343.017506841852, "Agent": "fqf"}, {"env_step": 6400000, "rew": 1257.5, "rew_std": 311.28724676735476, "Agent": "fqf"}, {"env_step": 6500000, "rew": 1111.55, "rew_std": 231.75153181802273, "Agent": "fqf"}, {"env_step": 6600000, "rew": 1306.1, "rew_std": 182.76703750950279, "Agent": "fqf"}, {"env_step": 6700000, "rew": 1163.85, "rew_std": 369.6850044294467, "Agent": "fqf"}, {"env_step": 6800000, "rew": 1146.1, "rew_std": 217.4428430645626, "Agent": "fqf"}, {"env_step": 6900000, "rew": 1197.9, "rew_std": 226.09840777856002, "Agent": "fqf"}, {"env_step": 7000000, "rew": 1633.55, "rew_std": 109.7614344840664, "Agent": "fqf"}, {"env_step": 7100000, "rew": 1409.85, "rew_std": 353.492153378261, "Agent": "fqf"}, {"env_step": 7200000, "rew": 1372.45, "rew_std": 253.83640893299764, "Agent": "fqf"}, {"env_step": 7300000, "rew": 1275.55, "rew_std": 352.75008504605637, "Agent": "fqf"}, {"env_step": 7400000, "rew": 1356.95, "rew_std": 431.4569184750663, "Agent": "fqf"}, {"env_step": 7500000, "rew": 1394.0, "rew_std": 367.9727571437864, "Agent": "fqf"}, {"env_step": 7600000, "rew": 1537.6, "rew_std": 332.01941208308887, "Agent": "fqf"}, {"env_step": 7700000, "rew": 1574.95, "rew_std": 366.9757110491102, "Agent": "fqf"}, {"env_step": 7800000, "rew": 1337.1, "rew_std": 339.9577767900008, "Agent": "fqf"}, {"env_step": 7900000, "rew": 1460.65, "rew_std": 323.72148909208977, "Agent": "fqf"}, {"env_step": 8000000, "rew": 1490.3, "rew_std": 428.0282817758658, "Agent": "fqf"}, {"env_step": 8100000, "rew": 1340.1, "rew_std": 215.97648020097003, "Agent": "fqf"}, {"env_step": 8200000, "rew": 1639.3, "rew_std": 353.53735304773664, "Agent": "fqf"}, {"env_step": 8300000, "rew": 1621.2, "rew_std": 447.873486600848, "Agent": "fqf"}, {"env_step": 8400000, "rew": 1636.0, "rew_std": 445.03949262958673, "Agent": "fqf"}, {"env_step": 8500000, "rew": 1507.95, "rew_std": 247.5409511575812, "Agent": "fqf"}, {"env_step": 8600000, "rew": 1481.65, "rew_std": 305.7656005831918, "Agent": "fqf"}, {"env_step": 8700000, "rew": 1612.35, "rew_std": 260.7944640900186, "Agent": "fqf"}, {"env_step": 8800000, "rew": 1461.6, "rew_std": 150.4408189289064, "Agent": "fqf"}, {"env_step": 8900000, "rew": 1593.3, "rew_std": 260.38521463401105, "Agent": "fqf"}, {"env_step": 9000000, "rew": 1542.55, "rew_std": 377.05214825007954, "Agent": "fqf"}, {"env_step": 9100000, "rew": 1562.2, "rew_std": 291.7097187273678, "Agent": "fqf"}, {"env_step": 9200000, "rew": 1645.4, "rew_std": 301.2403359445743, "Agent": "fqf"}, {"env_step": 9300000, "rew": 1787.55, "rew_std": 340.77921371468653, "Agent": "fqf"}, {"env_step": 9400000, "rew": 1669.3, "rew_std": 312.634627000913, "Agent": "fqf"}, {"env_step": 9500000, "rew": 1691.45, "rew_std": 373.35073657353354, "Agent": "fqf"}, {"env_step": 9600000, "rew": 1444.45, "rew_std": 174.3102765186264, "Agent": "fqf"}, {"env_step": 9700000, "rew": 1547.25, "rew_std": 277.2487375985687, "Agent": "fqf"}, {"env_step": 9800000, "rew": 1697.55, "rew_std": 281.5422215228117, "Agent": "fqf"}, {"env_step": 9900000, "rew": 1566.15, "rew_std": 436.8016168697181, "Agent": "fqf"}, {"env_step": 10000000, "rew": 1580.2, "rew_std": 413.62206179071256, "Agent": "fqf"}, {"env_step": 0, "rew": 104.2, "rew_std": 85.97418217116113, "Agent": "qrdqn"}, {"env_step": 100000, "rew": 222.7, "rew_std": 70.10249638921569, "Agent": "qrdqn"}, {"env_step": 200000, "rew": 284.55, "rew_std": 65.67931561762806, "Agent": "qrdqn"}, {"env_step": 300000, "rew": 298.65, "rew_std": 112.2911505863218, "Agent": "qrdqn"}, {"env_step": 400000, "rew": 401.8, "rew_std": 97.01886414507233, "Agent": "qrdqn"}, {"env_step": 500000, "rew": 307.5, "rew_std": 84.26268450506429, "Agent": "qrdqn"}, {"env_step": 600000, "rew": 300.85, "rew_std": 93.35230313173854, "Agent": "qrdqn"}, {"env_step": 700000, "rew": 326.1, "rew_std": 88.10839914559793, "Agent": "qrdqn"}, {"env_step": 800000, "rew": 373.1, "rew_std": 67.90500717914696, "Agent": "qrdqn"}, {"env_step": 900000, "rew": 435.5, "rew_std": 72.41926539257355, "Agent": "qrdqn"}, {"env_step": 1000000, "rew": 410.55, "rew_std": 76.28939965683306, "Agent": "qrdqn"}, {"env_step": 1100000, "rew": 413.0, "rew_std": 106.63043655542258, "Agent": "qrdqn"}, {"env_step": 1200000, "rew": 435.95, "rew_std": 79.69894917751425, "Agent": "qrdqn"}, {"env_step": 1300000, "rew": 429.0, "rew_std": 77.78110310351737, "Agent": "qrdqn"}, {"env_step": 1400000, "rew": 486.8, "rew_std": 72.63580384355913, "Agent": "qrdqn"}, {"env_step": 1500000, "rew": 430.25, "rew_std": 113.51965688813546, "Agent": "qrdqn"}, {"env_step": 1600000, "rew": 468.6, "rew_std": 107.82086996495623, "Agent": "qrdqn"}, {"env_step": 1700000, "rew": 475.6, "rew_std": 44.46223116308942, "Agent": "qrdqn"}, {"env_step": 1800000, "rew": 501.05, "rew_std": 96.80765723846436, "Agent": "qrdqn"}, {"env_step": 1900000, "rew": 462.0, "rew_std": 61.099099829702894, "Agent": "qrdqn"}, {"env_step": 2000000, "rew": 496.1, "rew_std": 102.8520296348108, "Agent": "qrdqn"}, {"env_step": 2100000, "rew": 519.55, "rew_std": 87.50984230359464, "Agent": "qrdqn"}, {"env_step": 2200000, "rew": 485.35, "rew_std": 75.20473721781096, "Agent": "qrdqn"}, {"env_step": 2300000, "rew": 512.45, "rew_std": 120.2733241413074, "Agent": "qrdqn"}, {"env_step": 2400000, "rew": 489.4, "rew_std": 75.57109235680004, "Agent": "qrdqn"}, {"env_step": 2500000, "rew": 511.45, "rew_std": 56.399224285445634, "Agent": "qrdqn"}, {"env_step": 2600000, "rew": 513.45, "rew_std": 94.62939553859572, "Agent": "qrdqn"}, {"env_step": 2700000, "rew": 497.9, "rew_std": 62.18231259771544, "Agent": "qrdqn"}, {"env_step": 2800000, "rew": 509.8, "rew_std": 98.18966340710207, "Agent": "qrdqn"}, {"env_step": 2900000, "rew": 481.3, "rew_std": 49.9585828461937, "Agent": "qrdqn"}, {"env_step": 3000000, "rew": 519.35, "rew_std": 65.75106463016398, "Agent": "qrdqn"}, {"env_step": 3100000, "rew": 485.7, "rew_std": 51.05692901066417, "Agent": "qrdqn"}, {"env_step": 3200000, "rew": 518.6, "rew_std": 81.14117327226664, "Agent": "qrdqn"}, {"env_step": 3300000, "rew": 559.25, "rew_std": 62.88889011582252, "Agent": "qrdqn"}, {"env_step": 3400000, "rew": 512.15, "rew_std": 106.45211364740486, "Agent": "qrdqn"}, {"env_step": 3500000, "rew": 522.7, "rew_std": 51.48456079253275, "Agent": "qrdqn"}, {"env_step": 3600000, "rew": 565.4, "rew_std": 101.5477227711188, "Agent": "qrdqn"}, {"env_step": 3700000, "rew": 577.85, "rew_std": 100.99580436830037, "Agent": "qrdqn"}, {"env_step": 3800000, "rew": 509.1, "rew_std": 87.09069984791716, "Agent": "qrdqn"}, {"env_step": 3900000, "rew": 546.35, "rew_std": 75.80997625642684, "Agent": "qrdqn"}, {"env_step": 4000000, "rew": 516.45, "rew_std": 87.6491443198392, "Agent": "qrdqn"}, {"env_step": 4100000, "rew": 520.0, "rew_std": 117.6093108559012, "Agent": "qrdqn"}, {"env_step": 4200000, "rew": 546.85, "rew_std": 85.65396955191278, "Agent": "qrdqn"}, {"env_step": 4300000, "rew": 545.15, "rew_std": 87.88431316224755, "Agent": "qrdqn"}, {"env_step": 4400000, "rew": 489.25, "rew_std": 74.9827480157936, "Agent": "qrdqn"}, {"env_step": 4500000, "rew": 593.25, "rew_std": 62.53169196495486, "Agent": "qrdqn"}, {"env_step": 4600000, "rew": 527.6, "rew_std": 122.62091991173449, "Agent": "qrdqn"}, {"env_step": 4700000, "rew": 520.9, "rew_std": 74.23099083267041, "Agent": "qrdqn"}, {"env_step": 4800000, "rew": 598.05, "rew_std": 122.61779030793205, "Agent": "qrdqn"}, {"env_step": 4900000, "rew": 545.5, "rew_std": 72.01284607623838, "Agent": "qrdqn"}, {"env_step": 5000000, "rew": 603.55, "rew_std": 73.38203117930165, "Agent": "qrdqn"}, {"env_step": 5100000, "rew": 559.75, "rew_std": 47.47433517175359, "Agent": "qrdqn"}, {"env_step": 5200000, "rew": 558.3, "rew_std": 102.89951409020357, "Agent": "qrdqn"}, {"env_step": 5300000, "rew": 614.8, "rew_std": 85.48280528854912, "Agent": "qrdqn"}, {"env_step": 5400000, "rew": 604.75, "rew_std": 84.51220326083092, "Agent": "qrdqn"}, {"env_step": 5500000, "rew": 611.15, "rew_std": 93.74754663456532, "Agent": "qrdqn"}, {"env_step": 5600000, "rew": 520.85, "rew_std": 106.81691111429876, "Agent": "qrdqn"}, {"env_step": 5700000, "rew": 660.4, "rew_std": 123.0060567614457, "Agent": "qrdqn"}, {"env_step": 5800000, "rew": 585.9, "rew_std": 127.98120955827851, "Agent": "qrdqn"}, {"env_step": 5900000, "rew": 570.45, "rew_std": 98.77789479433139, "Agent": "qrdqn"}, {"env_step": 6000000, "rew": 641.5, "rew_std": 90.51408730136983, "Agent": "qrdqn"}, {"env_step": 6100000, "rew": 592.95, "rew_std": 103.75438544948354, "Agent": "qrdqn"}, {"env_step": 6200000, "rew": 612.3, "rew_std": 82.66323245554823, "Agent": "qrdqn"}, {"env_step": 6300000, "rew": 642.25, "rew_std": 79.97319863554289, "Agent": "qrdqn"}, {"env_step": 6400000, "rew": 652.8, "rew_std": 100.57017450516828, "Agent": "qrdqn"}, {"env_step": 6500000, "rew": 617.95, "rew_std": 120.11233283888878, "Agent": "qrdqn"}, {"env_step": 6600000, "rew": 579.0, "rew_std": 103.84363244802255, "Agent": "qrdqn"}, {"env_step": 6700000, "rew": 566.85, "rew_std": 83.09273433941141, "Agent": "qrdqn"}, {"env_step": 6800000, "rew": 572.8, "rew_std": 120.3420541622919, "Agent": "qrdqn"}, {"env_step": 6900000, "rew": 600.65, "rew_std": 63.689893232757115, "Agent": "qrdqn"}, {"env_step": 7000000, "rew": 576.3, "rew_std": 126.27830375800905, "Agent": "qrdqn"}, {"env_step": 7100000, "rew": 573.25, "rew_std": 80.12154828758615, "Agent": "qrdqn"}, {"env_step": 7200000, "rew": 580.7, "rew_std": 85.19865022404991, "Agent": "qrdqn"}, {"env_step": 7300000, "rew": 549.95, "rew_std": 106.52029149415617, "Agent": "qrdqn"}, {"env_step": 7400000, "rew": 559.15, "rew_std": 76.05657433779146, "Agent": "qrdqn"}, {"env_step": 7500000, "rew": 558.75, "rew_std": 96.65175890794745, "Agent": "qrdqn"}, {"env_step": 7600000, "rew": 628.15, "rew_std": 106.18169569186584, "Agent": "qrdqn"}, {"env_step": 7700000, "rew": 630.65, "rew_std": 103.51402078945634, "Agent": "qrdqn"}, {"env_step": 7800000, "rew": 617.2, "rew_std": 153.99938311564756, "Agent": "qrdqn"}, {"env_step": 7900000, "rew": 596.9, "rew_std": 75.67886098508619, "Agent": "qrdqn"}, {"env_step": 8000000, "rew": 528.0, "rew_std": 121.1131702169504, "Agent": "qrdqn"}, {"env_step": 8100000, "rew": 606.8, "rew_std": 102.84313297444804, "Agent": "qrdqn"}, {"env_step": 8200000, "rew": 591.8, "rew_std": 142.44142655842785, "Agent": "qrdqn"}, {"env_step": 8300000, "rew": 550.85, "rew_std": 137.40925187191726, "Agent": "qrdqn"}, {"env_step": 8400000, "rew": 569.7, "rew_std": 142.38858802586674, "Agent": "qrdqn"}, {"env_step": 8500000, "rew": 603.85, "rew_std": 53.07732566736949, "Agent": "qrdqn"}, {"env_step": 8600000, "rew": 570.5, "rew_std": 92.73537620563148, "Agent": "qrdqn"}, {"env_step": 8700000, "rew": 667.8, "rew_std": 81.47244933104687, "Agent": "qrdqn"}, {"env_step": 8800000, "rew": 550.0, "rew_std": 169.62207993065053, "Agent": "qrdqn"}, {"env_step": 8900000, "rew": 636.75, "rew_std": 63.59490938746591, "Agent": "qrdqn"}, {"env_step": 9000000, "rew": 586.6, "rew_std": 137.99434771033194, "Agent": "qrdqn"}, {"env_step": 9100000, "rew": 609.55, "rew_std": 123.41463649016674, "Agent": "qrdqn"}, {"env_step": 9200000, "rew": 626.9, "rew_std": 111.03260782310753, "Agent": "qrdqn"}, {"env_step": 9300000, "rew": 624.4, "rew_std": 161.2231062844281, "Agent": "qrdqn"}, {"env_step": 9400000, "rew": 633.05, "rew_std": 107.944997568206, "Agent": "qrdqn"}, {"env_step": 9500000, "rew": 552.85, "rew_std": 160.7512130591866, "Agent": "qrdqn"}, {"env_step": 9600000, "rew": 555.75, "rew_std": 80.15804700714708, "Agent": "qrdqn"}, {"env_step": 9700000, "rew": 582.25, "rew_std": 108.67411145254421, "Agent": "qrdqn"}, {"env_step": 9800000, "rew": 635.15, "rew_std": 106.95303876000905, "Agent": "qrdqn"}, {"env_step": 9900000, "rew": 597.35, "rew_std": 98.45660211484042, "Agent": "qrdqn"}, {"env_step": 10000000, "rew": 550.15, "rew_std": 221.34204864869213, "Agent": "qrdqn"}, {"env_step": 0, "rew": 193.85, "rew_std": 59.73192195133185, "Agent": "iqn"}, {"env_step": 100000, "rew": 178.3, "rew_std": 102.48638934024362, "Agent": "iqn"}, {"env_step": 200000, "rew": 275.5, "rew_std": 63.457466069801434, "Agent": "iqn"}, {"env_step": 300000, "rew": 309.5, "rew_std": 43.517812445020716, "Agent": "iqn"}, {"env_step": 400000, "rew": 321.25, "rew_std": 78.36525058978629, "Agent": "iqn"}, {"env_step": 500000, "rew": 374.9, "rew_std": 70.88398972969847, "Agent": "iqn"}, {"env_step": 600000, "rew": 344.3, "rew_std": 138.2237678548809, "Agent": "iqn"}, {"env_step": 700000, "rew": 402.65, "rew_std": 115.49524882002723, "Agent": "iqn"}, {"env_step": 800000, "rew": 502.4, "rew_std": 117.13129385437523, "Agent": "iqn"}, {"env_step": 900000, "rew": 550.95, "rew_std": 73.10145347392212, "Agent": "iqn"}, {"env_step": 1000000, "rew": 542.7, "rew_std": 35.80237422294784, "Agent": "iqn"}, {"env_step": 1100000, "rew": 579.8, "rew_std": 73.14273716508016, "Agent": "iqn"}, {"env_step": 1200000, "rew": 617.6, "rew_std": 98.75621499429795, "Agent": "iqn"}, {"env_step": 1300000, "rew": 650.15, "rew_std": 124.93219160808795, "Agent": "iqn"}, {"env_step": 1400000, "rew": 666.45, "rew_std": 72.53014890374898, "Agent": "iqn"}, {"env_step": 1500000, "rew": 619.95, "rew_std": 84.52113640977622, "Agent": "iqn"}, {"env_step": 1600000, "rew": 633.65, "rew_std": 143.48554805275688, "Agent": "iqn"}, {"env_step": 1700000, "rew": 659.7, "rew_std": 71.16923492633597, "Agent": "iqn"}, {"env_step": 1800000, "rew": 746.45, "rew_std": 159.69368334408222, "Agent": "iqn"}, {"env_step": 1900000, "rew": 713.35, "rew_std": 60.149418118548745, "Agent": "iqn"}, {"env_step": 2000000, "rew": 708.95, "rew_std": 140.94013800191908, "Agent": "iqn"}, {"env_step": 2100000, "rew": 723.65, "rew_std": 82.80701962998064, "Agent": "iqn"}, {"env_step": 2200000, "rew": 680.25, "rew_std": 95.15467671113176, "Agent": "iqn"}, {"env_step": 2300000, "rew": 799.4, "rew_std": 105.6581279410155, "Agent": "iqn"}, {"env_step": 2400000, "rew": 761.5, "rew_std": 83.66719787347967, "Agent": "iqn"}, {"env_step": 2500000, "rew": 796.4, "rew_std": 124.04732967702287, "Agent": "iqn"}, {"env_step": 2600000, "rew": 689.55, "rew_std": 71.40919058496603, "Agent": "iqn"}, {"env_step": 2700000, "rew": 688.0, "rew_std": 80.3962063781619, "Agent": "iqn"}, {"env_step": 2800000, "rew": 757.45, "rew_std": 125.27738223637978, "Agent": "iqn"}, {"env_step": 2900000, "rew": 756.0, "rew_std": 107.79216112500946, "Agent": "iqn"}, {"env_step": 3000000, "rew": 744.8, "rew_std": 124.25441642050394, "Agent": "iqn"}, {"env_step": 3100000, "rew": 812.6, "rew_std": 155.1967783170772, "Agent": "iqn"}, {"env_step": 3200000, "rew": 757.55, "rew_std": 168.36262203945387, "Agent": "iqn"}, {"env_step": 3300000, "rew": 770.5, "rew_std": 111.00945905642456, "Agent": "iqn"}, {"env_step": 3400000, "rew": 759.45, "rew_std": 102.85267376203694, "Agent": "iqn"}, {"env_step": 3500000, "rew": 850.85, "rew_std": 200.12159428707338, "Agent": "iqn"}, {"env_step": 3600000, "rew": 785.85, "rew_std": 128.4011390136396, "Agent": "iqn"}, {"env_step": 3700000, "rew": 787.85, "rew_std": 137.1677895863311, "Agent": "iqn"}, {"env_step": 3800000, "rew": 791.75, "rew_std": 188.92634146672083, "Agent": "iqn"}, {"env_step": 3900000, "rew": 774.5, "rew_std": 60.62507731953833, "Agent": "iqn"}, {"env_step": 4000000, "rew": 872.55, "rew_std": 194.64755970728223, "Agent": "iqn"}, {"env_step": 4100000, "rew": 782.7, "rew_std": 128.8107526567561, "Agent": "iqn"}, {"env_step": 4200000, "rew": 826.2, "rew_std": 168.36124851045741, "Agent": "iqn"}, {"env_step": 4300000, "rew": 795.05, "rew_std": 154.91133754506157, "Agent": "iqn"}, {"env_step": 4400000, "rew": 824.45, "rew_std": 175.53866383221674, "Agent": "iqn"}, {"env_step": 4500000, "rew": 912.3, "rew_std": 182.14118150489747, "Agent": "iqn"}, {"env_step": 4600000, "rew": 857.55, "rew_std": 158.4999132491876, "Agent": "iqn"}, {"env_step": 4700000, "rew": 815.05, "rew_std": 86.60411364363705, "Agent": "iqn"}, {"env_step": 4800000, "rew": 806.95, "rew_std": 147.00245746245196, "Agent": "iqn"}, {"env_step": 4900000, "rew": 912.15, "rew_std": 95.28878475455545, "Agent": "iqn"}, {"env_step": 5000000, "rew": 883.0, "rew_std": 149.37553347185073, "Agent": "iqn"}, {"env_step": 5100000, "rew": 886.6, "rew_std": 178.2464305392958, "Agent": "iqn"}, {"env_step": 5200000, "rew": 933.15, "rew_std": 92.64341584807849, "Agent": "iqn"}, {"env_step": 5300000, "rew": 874.25, "rew_std": 130.7188299366239, "Agent": "iqn"}, {"env_step": 5400000, "rew": 882.05, "rew_std": 148.7500336134416, "Agent": "iqn"}, {"env_step": 5500000, "rew": 801.65, "rew_std": 157.15773127657448, "Agent": "iqn"}, {"env_step": 5600000, "rew": 927.7, "rew_std": 267.2525210358174, "Agent": "iqn"}, {"env_step": 5700000, "rew": 952.6, "rew_std": 184.16715233721783, "Agent": "iqn"}, {"env_step": 5800000, "rew": 857.65, "rew_std": 235.34772677890902, "Agent": "iqn"}, {"env_step": 5900000, "rew": 836.4, "rew_std": 238.53121389034183, "Agent": "iqn"}, {"env_step": 6000000, "rew": 890.35, "rew_std": 114.51791344588845, "Agent": "iqn"}, {"env_step": 6100000, "rew": 935.55, "rew_std": 166.9722207434518, "Agent": "iqn"}, {"env_step": 6200000, "rew": 941.05, "rew_std": 148.40981268096797, "Agent": "iqn"}, {"env_step": 6300000, "rew": 965.9, "rew_std": 189.78208556130897, "Agent": "iqn"}, {"env_step": 6400000, "rew": 875.15, "rew_std": 215.37328641221964, "Agent": "iqn"}, {"env_step": 6500000, "rew": 939.0, "rew_std": 153.9548635152524, "Agent": "iqn"}, {"env_step": 6600000, "rew": 928.2, "rew_std": 232.79499565067974, "Agent": "iqn"}, {"env_step": 6700000, "rew": 847.65, "rew_std": 67.1215501906802, "Agent": "iqn"}, {"env_step": 6800000, "rew": 961.7, "rew_std": 153.62831770217363, "Agent": "iqn"}, {"env_step": 6900000, "rew": 917.75, "rew_std": 210.0694230486674, "Agent": "iqn"}, {"env_step": 7000000, "rew": 887.35, "rew_std": 105.70029564764708, "Agent": "iqn"}, {"env_step": 7100000, "rew": 958.85, "rew_std": 133.01297869005117, "Agent": "iqn"}, {"env_step": 7200000, "rew": 886.9, "rew_std": 131.38260919923914, "Agent": "iqn"}, {"env_step": 7300000, "rew": 919.7, "rew_std": 193.17805258362037, "Agent": "iqn"}, {"env_step": 7400000, "rew": 843.9, "rew_std": 261.7283706440706, "Agent": "iqn"}, {"env_step": 7500000, "rew": 865.8, "rew_std": 213.59625464881168, "Agent": "iqn"}, {"env_step": 7600000, "rew": 951.8, "rew_std": 178.61973575168003, "Agent": "iqn"}, {"env_step": 7700000, "rew": 889.7, "rew_std": 187.84557487468265, "Agent": "iqn"}, {"env_step": 7800000, "rew": 977.9, "rew_std": 223.82055312236184, "Agent": "iqn"}, {"env_step": 7900000, "rew": 909.35, "rew_std": 181.14414840121114, "Agent": "iqn"}, {"env_step": 8000000, "rew": 985.15, "rew_std": 156.5410569147915, "Agent": "iqn"}, {"env_step": 8100000, "rew": 1051.1, "rew_std": 157.7045972697055, "Agent": "iqn"}, {"env_step": 8200000, "rew": 953.55, "rew_std": 201.69462684960152, "Agent": "iqn"}, {"env_step": 8300000, "rew": 951.6, "rew_std": 148.2414921673416, "Agent": "iqn"}, {"env_step": 8400000, "rew": 930.85, "rew_std": 137.40233804415413, "Agent": "iqn"}, {"env_step": 8500000, "rew": 1027.05, "rew_std": 82.30353880605621, "Agent": "iqn"}, {"env_step": 8600000, "rew": 999.15, "rew_std": 210.70109752917756, "Agent": "iqn"}, {"env_step": 8700000, "rew": 1005.65, "rew_std": 197.8955595762573, "Agent": "iqn"}, {"env_step": 8800000, "rew": 1114.7, "rew_std": 116.91389994350544, "Agent": "iqn"}, {"env_step": 8900000, "rew": 955.6, "rew_std": 172.78857022384324, "Agent": "iqn"}, {"env_step": 9000000, "rew": 858.45, "rew_std": 104.03327592650344, "Agent": "iqn"}, {"env_step": 9100000, "rew": 887.4, "rew_std": 217.57881330681073, "Agent": "iqn"}, {"env_step": 9200000, "rew": 965.85, "rew_std": 178.4683515360637, "Agent": "iqn"}, {"env_step": 9300000, "rew": 970.6, "rew_std": 139.28151348976647, "Agent": "iqn"}, {"env_step": 9400000, "rew": 964.0, "rew_std": 120.89272103811709, "Agent": "iqn"}, {"env_step": 9500000, "rew": 993.35, "rew_std": 285.66554307441424, "Agent": "iqn"}, {"env_step": 9600000, "rew": 965.5, "rew_std": 75.48609143411785, "Agent": "iqn"}, {"env_step": 9700000, "rew": 984.5, "rew_std": 142.40224717328024, "Agent": "iqn"}, {"env_step": 9800000, "rew": 959.0, "rew_std": 233.88319734431545, "Agent": "iqn"}, {"env_step": 9900000, "rew": 1060.1, "rew_std": 261.16113799721427, "Agent": "iqn"}, {"env_step": 10000000, "rew": 966.65, "rew_std": 156.9350263644162, "Agent": "iqn"}, {"env_step": 0, "rew": 129.2, "rew_std": 69.53567429744247, "Agent": "rainbow"}, {"env_step": 100000, "rew": 177.85, "rew_std": 75.14188246244566, "Agent": "rainbow"}, {"env_step": 200000, "rew": 198.55, "rew_std": 53.13823952672877, "Agent": "rainbow"}, {"env_step": 300000, "rew": 274.85, "rew_std": 47.645592660811765, "Agent": "rainbow"}, {"env_step": 400000, "rew": 297.05, "rew_std": 50.52496907470602, "Agent": "rainbow"}, {"env_step": 500000, "rew": 363.35, "rew_std": 90.23110605550616, "Agent": "rainbow"}, {"env_step": 600000, "rew": 377.1, "rew_std": 62.0112086642407, "Agent": "rainbow"}, {"env_step": 700000, "rew": 449.6, "rew_std": 20.95924616965028, "Agent": "rainbow"}, {"env_step": 800000, "rew": 478.55, "rew_std": 50.6070400240915, "Agent": "rainbow"}, {"env_step": 900000, "rew": 467.45, "rew_std": 44.51092562506424, "Agent": "rainbow"}, {"env_step": 1000000, "rew": 518.3, "rew_std": 61.56102663211522, "Agent": "rainbow"}, {"env_step": 1100000, "rew": 546.95, "rew_std": 26.70809802288437, "Agent": "rainbow"}, {"env_step": 1200000, "rew": 539.1, "rew_std": 56.16929766340327, "Agent": "rainbow"}, {"env_step": 1300000, "rew": 585.25, "rew_std": 74.84425495654293, "Agent": "rainbow"}, {"env_step": 1400000, "rew": 547.5, "rew_std": 76.41760268419836, "Agent": "rainbow"}, {"env_step": 1500000, "rew": 622.95, "rew_std": 78.23376828454577, "Agent": "rainbow"}, {"env_step": 1600000, "rew": 608.95, "rew_std": 82.29776728441665, "Agent": "rainbow"}, {"env_step": 1700000, "rew": 593.7, "rew_std": 66.87682707784514, "Agent": "rainbow"}, {"env_step": 1800000, "rew": 589.45, "rew_std": 61.96388060798, "Agent": "rainbow"}, {"env_step": 1900000, "rew": 616.65, "rew_std": 62.149839098745865, "Agent": "rainbow"}, {"env_step": 2000000, "rew": 625.85, "rew_std": 97.02269064502386, "Agent": "rainbow"}, {"env_step": 2100000, "rew": 625.05, "rew_std": 74.01602866947131, "Agent": "rainbow"}, {"env_step": 2200000, "rew": 604.05, "rew_std": 120.92465629473585, "Agent": "rainbow"}, {"env_step": 2300000, "rew": 645.65, "rew_std": 99.99626243015285, "Agent": "rainbow"}, {"env_step": 2400000, "rew": 700.1, "rew_std": 88.73691452828412, "Agent": "rainbow"}, {"env_step": 2500000, "rew": 651.45, "rew_std": 63.471430581010225, "Agent": "rainbow"}, {"env_step": 2600000, "rew": 664.25, "rew_std": 97.90409848417991, "Agent": "rainbow"}, {"env_step": 2700000, "rew": 724.25, "rew_std": 75.68726775356606, "Agent": "rainbow"}, {"env_step": 2800000, "rew": 670.4, "rew_std": 85.67461701110778, "Agent": "rainbow"}, {"env_step": 2900000, "rew": 741.0, "rew_std": 110.0034090380839, "Agent": "rainbow"}, {"env_step": 3000000, "rew": 760.65, "rew_std": 146.2280838279706, "Agent": "rainbow"}, {"env_step": 3100000, "rew": 758.85, "rew_std": 83.78127774150977, "Agent": "rainbow"}, {"env_step": 3200000, "rew": 781.65, "rew_std": 80.49038762485866, "Agent": "rainbow"}, {"env_step": 3300000, "rew": 759.45, "rew_std": 155.77362581643916, "Agent": "rainbow"}, {"env_step": 3400000, "rew": 764.45, "rew_std": 146.94462392343587, "Agent": "rainbow"}, {"env_step": 3500000, "rew": 801.25, "rew_std": 66.33259002933626, "Agent": "rainbow"}, {"env_step": 3600000, "rew": 816.85, "rew_std": 83.54371610121254, "Agent": "rainbow"}, {"env_step": 3700000, "rew": 808.7, "rew_std": 91.75380101118428, "Agent": "rainbow"}, {"env_step": 3800000, "rew": 858.65, "rew_std": 116.32176279613373, "Agent": "rainbow"}, {"env_step": 3900000, "rew": 785.0, "rew_std": 113.13421233207929, "Agent": "rainbow"}, {"env_step": 4000000, "rew": 784.1, "rew_std": 148.00249997888548, "Agent": "rainbow"}, {"env_step": 4100000, "rew": 847.8, "rew_std": 141.88837161656343, "Agent": "rainbow"}, {"env_step": 4200000, "rew": 864.45, "rew_std": 98.91623981935423, "Agent": "rainbow"}, {"env_step": 4300000, "rew": 874.8, "rew_std": 130.1499519784775, "Agent": "rainbow"}, {"env_step": 4400000, "rew": 896.05, "rew_std": 163.69735031453627, "Agent": "rainbow"}, {"env_step": 4500000, "rew": 803.35, "rew_std": 121.67580901724055, "Agent": "rainbow"}, {"env_step": 4600000, "rew": 883.9, "rew_std": 127.14004089978893, "Agent": "rainbow"}, {"env_step": 4700000, "rew": 884.0, "rew_std": 62.155450283945335, "Agent": "rainbow"}, {"env_step": 4800000, "rew": 880.95, "rew_std": 157.68995687741182, "Agent": "rainbow"}, {"env_step": 4900000, "rew": 889.8, "rew_std": 118.07925304641795, "Agent": "rainbow"}, {"env_step": 5000000, "rew": 926.05, "rew_std": 152.34573344862665, "Agent": "rainbow"}, {"env_step": 5100000, "rew": 909.15, "rew_std": 130.32882451706527, "Agent": "rainbow"}, {"env_step": 5200000, "rew": 860.3, "rew_std": 148.12835650205534, "Agent": "rainbow"}, {"env_step": 5300000, "rew": 921.85, "rew_std": 225.55631780112037, "Agent": "rainbow"}, {"env_step": 5400000, "rew": 906.55, "rew_std": 165.13622406970558, "Agent": "rainbow"}, {"env_step": 5500000, "rew": 830.05, "rew_std": 163.59407843806574, "Agent": "rainbow"}, {"env_step": 5600000, "rew": 936.0, "rew_std": 105.19054139988063, "Agent": "rainbow"}, {"env_step": 5700000, "rew": 953.05, "rew_std": 209.43226231887004, "Agent": "rainbow"}, {"env_step": 5800000, "rew": 1002.05, "rew_std": 93.83001918362801, "Agent": "rainbow"}, {"env_step": 5900000, "rew": 925.0, "rew_std": 173.92857729539446, "Agent": "rainbow"}, {"env_step": 6000000, "rew": 959.65, "rew_std": 155.2884493450817, "Agent": "rainbow"}, {"env_step": 6100000, "rew": 968.05, "rew_std": 146.51168042173293, "Agent": "rainbow"}, {"env_step": 6200000, "rew": 1050.5, "rew_std": 181.52906103431485, "Agent": "rainbow"}, {"env_step": 6300000, "rew": 949.45, "rew_std": 170.18320275514853, "Agent": "rainbow"}, {"env_step": 6400000, "rew": 989.1, "rew_std": 128.6547317435313, "Agent": "rainbow"}, {"env_step": 6500000, "rew": 1003.1, "rew_std": 173.66603006921073, "Agent": "rainbow"}, {"env_step": 6600000, "rew": 1086.4, "rew_std": 179.66382496206631, "Agent": "rainbow"}, {"env_step": 6700000, "rew": 878.0, "rew_std": 83.01204731844649, "Agent": "rainbow"}, {"env_step": 6800000, "rew": 1107.55, "rew_std": 113.496354567008, "Agent": "rainbow"}, {"env_step": 6900000, "rew": 1062.7, "rew_std": 188.38619906988941, "Agent": "rainbow"}, {"env_step": 7000000, "rew": 1025.8, "rew_std": 146.94577231074055, "Agent": "rainbow"}, {"env_step": 7100000, "rew": 969.65, "rew_std": 143.79204602480627, "Agent": "rainbow"}, {"env_step": 7200000, "rew": 1074.2, "rew_std": 236.38752082121428, "Agent": "rainbow"}, {"env_step": 7300000, "rew": 1129.1, "rew_std": 145.2378738483871, "Agent": "rainbow"}, {"env_step": 7400000, "rew": 1020.55, "rew_std": 165.74822020160576, "Agent": "rainbow"}, {"env_step": 7500000, "rew": 1026.55, "rew_std": 126.17734543094492, "Agent": "rainbow"}, {"env_step": 7600000, "rew": 1062.0, "rew_std": 134.57005610461786, "Agent": "rainbow"}, {"env_step": 7700000, "rew": 1086.0, "rew_std": 98.97221832413376, "Agent": "rainbow"}, {"env_step": 7800000, "rew": 1066.7, "rew_std": 159.72964659073153, "Agent": "rainbow"}, {"env_step": 7900000, "rew": 1040.4, "rew_std": 127.58032763714004, "Agent": "rainbow"}, {"env_step": 8000000, "rew": 1074.7, "rew_std": 214.01894775930472, "Agent": "rainbow"}, {"env_step": 8100000, "rew": 1095.35, "rew_std": 139.572033373452, "Agent": "rainbow"}, {"env_step": 8200000, "rew": 1175.95, "rew_std": 163.02261346205933, "Agent": "rainbow"}, {"env_step": 8300000, "rew": 1147.0, "rew_std": 173.63841740813004, "Agent": "rainbow"}, {"env_step": 8400000, "rew": 1167.4, "rew_std": 160.64289589023224, "Agent": "rainbow"}, {"env_step": 8500000, "rew": 1162.35, "rew_std": 261.15216349860094, "Agent": "rainbow"}, {"env_step": 8600000, "rew": 1090.85, "rew_std": 134.61947295989538, "Agent": "rainbow"}, {"env_step": 8700000, "rew": 1165.2, "rew_std": 295.2810694914254, "Agent": "rainbow"}, {"env_step": 8800000, "rew": 1233.65, "rew_std": 176.00952389004408, "Agent": "rainbow"}, {"env_step": 8900000, "rew": 1189.25, "rew_std": 256.7154309736756, "Agent": "rainbow"}, {"env_step": 9000000, "rew": 1097.2, "rew_std": 220.08159850382765, "Agent": "rainbow"}, {"env_step": 9100000, "rew": 1151.05, "rew_std": 172.71746437462542, "Agent": "rainbow"}, {"env_step": 9200000, "rew": 1204.9, "rew_std": 126.59498410284667, "Agent": "rainbow"}, {"env_step": 9300000, "rew": 1064.65, "rew_std": 216.47644791062143, "Agent": "rainbow"}, {"env_step": 9400000, "rew": 1358.15, "rew_std": 267.57840439766437, "Agent": "rainbow"}, {"env_step": 9500000, "rew": 1092.1, "rew_std": 237.28230022485874, "Agent": "rainbow"}, {"env_step": 9600000, "rew": 1312.5, "rew_std": 333.92401830356556, "Agent": "rainbow"}, {"env_step": 9700000, "rew": 1284.1, "rew_std": 214.49811187980188, "Agent": "rainbow"}, {"env_step": 9800000, "rew": 1226.6, "rew_std": 304.27987117126236, "Agent": "rainbow"}, {"env_step": 9900000, "rew": 1122.35, "rew_std": 192.80275542636832, "Agent": "rainbow"}, {"env_step": 10000000, "rew": 1184.0, "rew_std": 231.1005192551501, "Agent": "rainbow"}, {"env_step": 0, "rew": 171.85, "rew_std": 31.587220517164848, "Agent": "ppo"}, {"env_step": 100000, "rew": 226.2, "rew_std": 53.99453676067608, "Agent": "ppo"}, {"env_step": 200000, "rew": 240.45, "rew_std": 20.0517455599257, "Agent": "ppo"}, {"env_step": 300000, "rew": 282.7, "rew_std": 25.0421644431946, "Agent": "ppo"}, {"env_step": 400000, "rew": 291.8, "rew_std": 47.00276587606308, "Agent": "ppo"}, {"env_step": 500000, "rew": 320.5, "rew_std": 31.345653606201928, "Agent": "ppo"}, {"env_step": 600000, "rew": 314.0, "rew_std": 56.18807702707043, "Agent": "ppo"}, {"env_step": 700000, "rew": 320.9, "rew_std": 56.30133213344068, "Agent": "ppo"}, {"env_step": 800000, "rew": 331.25, "rew_std": 52.53439349607074, "Agent": "ppo"}, {"env_step": 900000, "rew": 385.95, "rew_std": 86.00071220635328, "Agent": "ppo"}, {"env_step": 1000000, "rew": 396.05, "rew_std": 54.37713214210547, "Agent": "ppo"}, {"env_step": 1100000, "rew": 366.65, "rew_std": 46.54301773628349, "Agent": "ppo"}, {"env_step": 1200000, "rew": 377.25, "rew_std": 51.198266572219026, "Agent": "ppo"}, {"env_step": 1300000, "rew": 386.5, "rew_std": 81.74686538332831, "Agent": "ppo"}, {"env_step": 1400000, "rew": 400.45, "rew_std": 102.20431742348265, "Agent": "ppo"}, {"env_step": 1500000, "rew": 417.4, "rew_std": 66.27586287631418, "Agent": "ppo"}, {"env_step": 1600000, "rew": 428.3, "rew_std": 44.33801980242239, "Agent": "ppo"}, {"env_step": 1700000, "rew": 392.8, "rew_std": 61.047604375601836, "Agent": "ppo"}, {"env_step": 1800000, "rew": 443.6, "rew_std": 57.05953031702943, "Agent": "ppo"}, {"env_step": 1900000, "rew": 424.75, "rew_std": 63.06078416892704, "Agent": "ppo"}, {"env_step": 2000000, "rew": 438.4, "rew_std": 42.03617489734288, "Agent": "ppo"}, {"env_step": 2100000, "rew": 468.85, "rew_std": 68.96160163453283, "Agent": "ppo"}, {"env_step": 2200000, "rew": 474.1, "rew_std": 64.23659393211942, "Agent": "ppo"}, {"env_step": 2300000, "rew": 467.0, "rew_std": 42.47646407129482, "Agent": "ppo"}, {"env_step": 2400000, "rew": 488.1, "rew_std": 49.019791921222996, "Agent": "ppo"}, {"env_step": 2500000, "rew": 528.75, "rew_std": 90.16602741609503, "Agent": "ppo"}, {"env_step": 2600000, "rew": 522.45, "rew_std": 87.87617709026718, "Agent": "ppo"}, {"env_step": 2700000, "rew": 504.35, "rew_std": 51.12047045949401, "Agent": "ppo"}, {"env_step": 2800000, "rew": 528.55, "rew_std": 70.213050781176, "Agent": "ppo"}, {"env_step": 2900000, "rew": 521.1, "rew_std": 44.84852282963175, "Agent": "ppo"}, {"env_step": 3000000, "rew": 565.5, "rew_std": 67.83251432757008, "Agent": "ppo"}, {"env_step": 3100000, "rew": 526.7, "rew_std": 55.79569875895453, "Agent": "ppo"}, {"env_step": 3200000, "rew": 610.9, "rew_std": 55.02990096302191, "Agent": "ppo"}, {"env_step": 3300000, "rew": 552.1, "rew_std": 85.30791288034189, "Agent": "ppo"}, {"env_step": 3400000, "rew": 594.4, "rew_std": 72.59814047205342, "Agent": "ppo"}, {"env_step": 3500000, "rew": 560.7, "rew_std": 89.29171294134747, "Agent": "ppo"}, {"env_step": 3600000, "rew": 580.25, "rew_std": 79.42205298278306, "Agent": "ppo"}, {"env_step": 3700000, "rew": 629.95, "rew_std": 90.99023299233825, "Agent": "ppo"}, {"env_step": 3800000, "rew": 593.6, "rew_std": 96.24598692932604, "Agent": "ppo"}, {"env_step": 3900000, "rew": 633.6, "rew_std": 63.77060451336494, "Agent": "ppo"}, {"env_step": 4000000, "rew": 623.85, "rew_std": 79.62538853908345, "Agent": "ppo"}, {"env_step": 4100000, "rew": 625.55, "rew_std": 71.11520582828963, "Agent": "ppo"}, {"env_step": 4200000, "rew": 631.1, "rew_std": 60.92405108001273, "Agent": "ppo"}, {"env_step": 4300000, "rew": 652.7, "rew_std": 78.92122654900898, "Agent": "ppo"}, {"env_step": 4400000, "rew": 645.25, "rew_std": 61.85194014741979, "Agent": "ppo"}, {"env_step": 4500000, "rew": 684.05, "rew_std": 84.49658277113933, "Agent": "ppo"}, {"env_step": 4600000, "rew": 696.05, "rew_std": 90.35857734603837, "Agent": "ppo"}, {"env_step": 4700000, "rew": 651.7, "rew_std": 98.24744271481065, "Agent": "ppo"}, {"env_step": 4800000, "rew": 710.2, "rew_std": 113.41190413708783, "Agent": "ppo"}, {"env_step": 4900000, "rew": 719.95, "rew_std": 103.00544888499832, "Agent": "ppo"}, {"env_step": 5000000, "rew": 702.85, "rew_std": 71.93714270111094, "Agent": "ppo"}, {"env_step": 5100000, "rew": 657.1, "rew_std": 91.01615241263497, "Agent": "ppo"}, {"env_step": 5200000, "rew": 669.75, "rew_std": 95.95891047734962, "Agent": "ppo"}, {"env_step": 5300000, "rew": 730.45, "rew_std": 102.41861403084891, "Agent": "ppo"}, {"env_step": 5400000, "rew": 707.9, "rew_std": 79.9180204960058, "Agent": "ppo"}, {"env_step": 5500000, "rew": 711.65, "rew_std": 116.25189245771442, "Agent": "ppo"}, {"env_step": 5600000, "rew": 742.6, "rew_std": 103.81541311385318, "Agent": "ppo"}, {"env_step": 5700000, "rew": 752.15, "rew_std": 98.74513912087015, "Agent": "ppo"}, {"env_step": 5800000, "rew": 791.7, "rew_std": 111.0621897857232, "Agent": "ppo"}, {"env_step": 5900000, "rew": 806.95, "rew_std": 144.94213500566354, "Agent": "ppo"}, {"env_step": 6000000, "rew": 827.45, "rew_std": 113.05871262313224, "Agent": "ppo"}, {"env_step": 6100000, "rew": 779.5, "rew_std": 100.94874937313487, "Agent": "ppo"}, {"env_step": 6200000, "rew": 812.75, "rew_std": 158.00810896912856, "Agent": "ppo"}, {"env_step": 6300000, "rew": 839.65, "rew_std": 123.6092735194249, "Agent": "ppo"}, {"env_step": 6400000, "rew": 852.95, "rew_std": 132.6488691998541, "Agent": "ppo"}, {"env_step": 6500000, "rew": 833.05, "rew_std": 148.62073374869334, "Agent": "ppo"}, {"env_step": 6600000, "rew": 887.55, "rew_std": 111.08947969992478, "Agent": "ppo"}, {"env_step": 6700000, "rew": 793.6, "rew_std": 104.03119724390372, "Agent": "ppo"}, {"env_step": 6800000, "rew": 832.25, "rew_std": 154.1725088983117, "Agent": "ppo"}, {"env_step": 6900000, "rew": 871.05, "rew_std": 154.96926308142525, "Agent": "ppo"}, {"env_step": 7000000, "rew": 833.1, "rew_std": 101.21086898154763, "Agent": "ppo"}, {"env_step": 7100000, "rew": 885.15, "rew_std": 144.50104670901175, "Agent": "ppo"}, {"env_step": 7200000, "rew": 850.1, "rew_std": 142.2687246024227, "Agent": "ppo"}, {"env_step": 7300000, "rew": 861.5, "rew_std": 87.94373201087159, "Agent": "ppo"}, {"env_step": 7400000, "rew": 834.6, "rew_std": 195.45917732355264, "Agent": "ppo"}, {"env_step": 7500000, "rew": 880.95, "rew_std": 143.8566039499056, "Agent": "ppo"}, {"env_step": 7600000, "rew": 921.95, "rew_std": 171.26462711254766, "Agent": "ppo"}, {"env_step": 7700000, "rew": 906.05, "rew_std": 214.73034834415, "Agent": "ppo"}, {"env_step": 7800000, "rew": 934.75, "rew_std": 217.31075099957664, "Agent": "ppo"}, {"env_step": 7900000, "rew": 927.8, "rew_std": 146.93998775010158, "Agent": "ppo"}, {"env_step": 8000000, "rew": 904.5, "rew_std": 154.3149377085705, "Agent": "ppo"}, {"env_step": 8100000, "rew": 902.9, "rew_std": 179.20083705161647, "Agent": "ppo"}, {"env_step": 8200000, "rew": 941.1, "rew_std": 163.1423917931817, "Agent": "ppo"}, {"env_step": 8300000, "rew": 956.8, "rew_std": 210.935440360315, "Agent": "ppo"}, {"env_step": 8400000, "rew": 913.4, "rew_std": 155.79261856711955, "Agent": "ppo"}, {"env_step": 8500000, "rew": 907.55, "rew_std": 156.9779363477556, "Agent": "ppo"}, {"env_step": 8600000, "rew": 883.95, "rew_std": 164.77324570451358, "Agent": "ppo"}, {"env_step": 8700000, "rew": 963.85, "rew_std": 182.24695470706774, "Agent": "ppo"}, {"env_step": 8800000, "rew": 993.0, "rew_std": 205.16420253055844, "Agent": "ppo"}, {"env_step": 8900000, "rew": 961.75, "rew_std": 131.86114856165935, "Agent": "ppo"}, {"env_step": 9000000, "rew": 969.8, "rew_std": 228.0311601514144, "Agent": "ppo"}, {"env_step": 9100000, "rew": 1003.2, "rew_std": 189.2723170461016, "Agent": "ppo"}, {"env_step": 9200000, "rew": 953.9, "rew_std": 193.50074935255418, "Agent": "ppo"}, {"env_step": 9300000, "rew": 955.5, "rew_std": 164.37198666439485, "Agent": "ppo"}, {"env_step": 9400000, "rew": 989.6, "rew_std": 161.20899478627115, "Agent": "ppo"}, {"env_step": 9500000, "rew": 1055.5, "rew_std": 215.21524109597814, "Agent": "ppo"}, {"env_step": 9600000, "rew": 1071.5, "rew_std": 225.1992451141877, "Agent": "ppo"}, {"env_step": 9700000, "rew": 954.35, "rew_std": 160.38080464943425, "Agent": "ppo"}, {"env_step": 9800000, "rew": 965.05, "rew_std": 193.5224082632293, "Agent": "ppo"}, {"env_step": 9900000, "rew": 1018.95, "rew_std": 173.74959712183508, "Agent": "ppo"}, {"env_step": 10000000, "rew": 1129.5, "rew_std": 145.34132241038677, "Agent": "ppo"}]
examples/atari/results/c51/Breakout_rew.png ADDED
examples/atari/results/c51/Enduro_rew.png ADDED
examples/atari/results/c51/MsPacman_rew.png ADDED
examples/atari/results/c51/Pong_rew.png ADDED
examples/atari/results/c51/Qbert_rew.png ADDED
examples/atari/results/c51/Seaquest_rew.png ADDED
examples/atari/results/c51/SpaceInvader_rew.png ADDED
examples/atari/results/discrete_sac/Breakout_rew.png ADDED
examples/atari/results/discrete_sac/Enduro_rew.png ADDED
examples/atari/results/discrete_sac/MsPacman_rew.png ADDED
examples/atari/results/discrete_sac/Pong_rew.png ADDED
examples/atari/results/discrete_sac/Qbert_rew.png ADDED
examples/atari/results/discrete_sac/Seaquest_rew.png ADDED
examples/atari/results/discrete_sac/SpaceInvaders_rew.png ADDED
examples/atari/results/dqn/Breakout_rew.png ADDED
examples/atari/results/dqn/Enduro_rew.png ADDED
examples/atari/results/dqn/MsPacman_rew.png ADDED