Initial commit
Browse files- args.yml +1 -1
- dqn-FrozenLake-v1.zip +1 -1
- dqn-FrozenLake-v1/data +17 -17
- dqn-FrozenLake-v1/policy.optimizer.pth +1 -1
- dqn-FrozenLake-v1/policy.pth +1 -1
- replay.mp4 +0 -0
- results.json +1 -1
- train_eval_metrics.zip +2 -2
args.yml
CHANGED
@@ -56,7 +56,7 @@
|
|
56 |
- - save_replay_buffer
|
57 |
- false
|
58 |
- - seed
|
59 |
-
-
|
60 |
- - storage
|
61 |
- null
|
62 |
- - study_name
|
|
|
56 |
- - save_replay_buffer
|
57 |
- false
|
58 |
- - seed
|
59 |
+
- 3106717846
|
60 |
- - storage
|
61 |
- null
|
62 |
- - study_name
|
dqn-FrozenLake-v1.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 115453
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9167e7d3d13287f52535a22a1c77b466b58a363549a225ffae5d1fa855d860c
|
3 |
size 115453
|
dqn-FrozenLake-v1/data
CHANGED
@@ -5,15 +5,15 @@
|
|
5 |
"__module__": "stable_baselines3.dqn.policies",
|
6 |
"__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
|
7 |
"__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
8 |
-
"__init__": "<function DQNPolicy.__init__ at
|
9 |
-
"_build": "<function DQNPolicy._build at
|
10 |
-
"make_q_net": "<function DQNPolicy.make_q_net at
|
11 |
-
"forward": "<function DQNPolicy.forward at
|
12 |
-
"_predict": "<function DQNPolicy._predict at
|
13 |
-
"_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at
|
14 |
-
"set_training_mode": "<function DQNPolicy.set_training_mode at
|
15 |
"__abstractmethods__": "frozenset()",
|
16 |
-
"_abc_impl": "<_abc._abc_data object at
|
17 |
},
|
18 |
"verbose": 1,
|
19 |
"policy_kwargs": {},
|
@@ -22,7 +22,7 @@
|
|
22 |
"_num_timesteps_at_start": 0,
|
23 |
"seed": 0,
|
24 |
"action_noise": null,
|
25 |
-
"start_time":
|
26 |
"learning_rate": {
|
27 |
":type:": "<class 'function'>",
|
28 |
":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuFQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8aNuLrHEMthZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
|
@@ -37,14 +37,14 @@
|
|
37 |
":type:": "<class 'numpy.ndarray'>",
|
38 |
":serialized:": "gAWVewAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYIAAAAAAAAAAQAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBhZSMAUOUdJRSlC4="
|
39 |
},
|
40 |
-
"_episode_num":
|
41 |
"use_sde": false,
|
42 |
"sde_sample_freq": -1,
|
43 |
"_current_progress_remaining": 0.0,
|
44 |
"_stats_window_size": 100,
|
45 |
"ep_info_buffer": {
|
46 |
":type:": "<class 'collections.deque'>",
|
47 |
-
":serialized:": "
|
48 |
},
|
49 |
"ep_success_buffer": {
|
50 |
":type:": "<class 'collections.deque'>",
|
@@ -83,13 +83,13 @@
|
|
83 |
"__module__": "stable_baselines3.common.buffers",
|
84 |
"__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
|
85 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
86 |
-
"__init__": "<function ReplayBuffer.__init__ at
|
87 |
-
"add": "<function ReplayBuffer.add at
|
88 |
-
"sample": "<function ReplayBuffer.sample at
|
89 |
-
"_get_samples": "<function ReplayBuffer._get_samples at
|
90 |
-
"_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at
|
91 |
"__abstractmethods__": "frozenset()",
|
92 |
-
"_abc_impl": "<_abc._abc_data object at
|
93 |
},
|
94 |
"replay_buffer_kwargs": {},
|
95 |
"train_freq": {
|
|
|
5 |
"__module__": "stable_baselines3.dqn.policies",
|
6 |
"__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
|
7 |
"__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
8 |
+
"__init__": "<function DQNPolicy.__init__ at 0x7f482b60f910>",
|
9 |
+
"_build": "<function DQNPolicy._build at 0x7f482b60f9a0>",
|
10 |
+
"make_q_net": "<function DQNPolicy.make_q_net at 0x7f482b60fa30>",
|
11 |
+
"forward": "<function DQNPolicy.forward at 0x7f482b60fac0>",
|
12 |
+
"_predict": "<function DQNPolicy._predict at 0x7f482b60fb50>",
|
13 |
+
"_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x7f482b60fbe0>",
|
14 |
+
"set_training_mode": "<function DQNPolicy.set_training_mode at 0x7f482b60fc70>",
|
15 |
"__abstractmethods__": "frozenset()",
|
16 |
+
"_abc_impl": "<_abc._abc_data object at 0x7f482b624c80>"
|
17 |
},
|
18 |
"verbose": 1,
|
19 |
"policy_kwargs": {},
|
|
|
22 |
"_num_timesteps_at_start": 0,
|
23 |
"seed": 0,
|
24 |
"action_noise": null,
|
25 |
+
"start_time": 1727970156559401634,
|
26 |
"learning_rate": {
|
27 |
":type:": "<class 'function'>",
|
28 |
":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuFQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8aNuLrHEMthZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
|
|
|
37 |
":type:": "<class 'numpy.ndarray'>",
|
38 |
":serialized:": "gAWVewAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYIAAAAAAAAAAQAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBhZSMAUOUdJRSlC4="
|
39 |
},
|
40 |
+
"_episode_num": 3799,
|
41 |
"use_sde": false,
|
42 |
"sde_sample_freq": -1,
|
43 |
"_current_progress_remaining": 0.0,
|
44 |
"_stats_window_size": 100,
|
45 |
"ep_info_buffer": {
|
46 |
":type:": "<class 'collections.deque'>",
|
47 |
+
":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHAAAAAAAAAACMAWyUSxaMAXSUR0BhI65Etuk2dX2UKGgGRwAAAAAAAAAAaAdLF2gIR0BhJR4jbBXTdX2UKGgGRwAAAAAAAAAAaAdLDWgIR0BhJd1hb4ahdX2UKGgGRwAAAAAAAAAAaAdLCWgIR0BhJnttygf2dX2UKGgGRwAAAAAAAAAAaAdLJGgIR0BhKH2Xb/OudX2UKGgGRz/wAAAAAAAAaAdLJGgIR0BhKp1FH8TBdX2UKGgGRwAAAAAAAAAAaAdLNmgIR0BhLcmY0EX+dX2UKGgGRz/wAAAAAAAAaAdLIWgIR0BhL6xTsIE9dX2UKGgGRz/wAAAAAAAAaAdLIWgIR0BhMYF/x2B8dX2UKGgGRwAAAAAAAAAAaAdLIGgIR0BhMz4cm0E6dX2UKGgGRz/wAAAAAAAAaAdLNmgIR0BhNg8fV7QcdX2UKGgGRz/wAAAAAAAAaAdLIGgIR0BhN8PFvQ4TdX2UKGgGRwAAAAAAAAAAaAdLY2gIR0BhPgIdELH/dX2UKGgGRz/wAAAAAAAAaAdLO2gIR0BhQVwNsnAqdX2UKGgGRwAAAAAAAAAAaAdLEWgIR0BhQoYixFAndX2UKGgGRz/wAAAAAAAAaAdLDmgIR0BhQz9OymhudX2UKGgGRwAAAAAAAAAAaAdLDmgIR0BhRBvo/zJ7dX2UKGgGRwAAAAAAAAAAaAdLB2gIR0BhRIbsF+uvdX2UKGgGRz/wAAAAAAAAaAdLIGgIR0BhRolMRHwxdX2UKGgGRz/wAAAAAAAAaAdLGWgIR0BhR/MjeKsNdX2UKGgGRwAAAAAAAAAAaAdLDmgIR0BhSKfYjB2wdX2UKGgGRz/wAAAAAAAAaAdLMWgIR0BhS3QdCE6DdX2UKGgGRwAAAAAAAAAAaAdLH2gIR0BhTUqDsdDIdX2UKGgGRz/wAAAAAAAAaAdLC2gIR0BhTeiSJTESdX2UKGgGRz/wAAAAAAAAaAdLL2gIR0BhUMRFqi48dX2UKGgGRz/wAAAAAAAAaAdLEGgIR0BhUacmShaldX2UKGgGRz/wAAAAAAAAaAdLH2gIR0BhU03qAz55dX2UKGgGRz/wAAAAAAAAaAdLH2gIR0BhVOzD4xk/dX2UKGgGRz/wAAAAAAAAaAdLHWgIR0BhVrEvTPSldX2UKGgGRwAAAAAAAAAAaAdLH2gIR0BhWGnVG0/odX2UKGgGRwAAAAAAAAAAaAdLD2gIR0BhWZlcyFfzdX2UKGgGRz/wAAAAAAAAaAdLD2gIR0BhWo/keZG8dX2UKGgGRz/wAAAAAAAAaAdLHmgIR0BhXF4cFQl9dX2UKGgGRz/wAAAAAAAAaAdLCmgIR0BhXPp0OmSAdX2UKGgGRz/wAAAAAAAAaAdLB2gIR0BhXUlb/wRXdX2UKGgGRz/wAAAAAAAAaAdLJ2gIR0BhXxhpg1FZdX2UKGgGRwAAAAAAAAAAaAdLDGgIR0BhX5tix3V1dX2UKGgGRz/wAAAAAAAAaAdLF2gIR0BhYJx5s0pFdX2UKGgGRz/wAAAAAAAAaAdLGmgIR0BhYbhegL7XdX2UKGgGRz/wAAAAAAAAaAdLHmgIR0BhYwPI4lyBdX2UKGgGRz/wAAAAAAAAaAdLRWgIR0BhZenQ6ZH/dX2UKGgGRz/wAAAAAAAAaAdLEGgIR0BhZqESM98rdX2UKGgGRz/wAAAAAAAAaAdLE2gIR0BhZ2mtQsPKdX2UKGgGRz/wAAAAAAAAaAdLW2gIR0Bhavs9jgAIdX2UKGgGRwAAAAAAAAAAaAdLHmgIR0BhbEP1+RYBdX2UKGgGRwAAAAAAAAAAaAdLEWgIR0BhbSQNkOI7dX2UKGgGRz/wAAAAAAAAaAdLM2gIR0BhbzIHTqjadX2UKGgGRwAAAAAAAAAAaAdLB2gIR0Bhb5iRW912dX2UKGgGRwAAAAAAAAAAaAdLOGgIR0Bhceogmqo7dX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bhdb9VFQVLdX2UKGgGRwAAAAAAAAAAaAdLHGgIR0BhdtSflIVedX2UKGgGRz/wAAAAAAAAaAdLFWgIR0Bhd7PyCnP3dX2UKGgGRwAAAAAAAAAAaAdLZGgIR0BhfKCtihFmdX2UKGgGRz/wAAAAAAAAaAdLK2gIR0Bhfq00FbFCdX2UKGgGRwAAAAAAAAAAaAdLDWgIR0Bhf1xEORT1dX2UKGgGRz/wAAAAAAAAaAdLDmgIR0BhgEUsWfsedX2UKGgGRwAAAAAAAAAAaAdLEWgIR0BhgPxYq5LAdX2UKGgGRz/wAAAAAAAAaAdLMmgIR0Bhgyews5GSdX2UKGgGRz/wAAAAAAAAaAdLE2gIR0Bhg9dAxBVudX2UKGgGRz/wAAAAAAAAaAdLUmgIR0Bhh0ZtNzsAdX2UKGgGRwAAAAAAAAAAaAdLGGgIR0BhiF16mfoSdX2UKGgGRwAAAAAAAAAAaAdLNmgIR0BhipTQ3PzGdX2UKGgGRz/wAAAAAAAAaAdLJWgIR0BhjDEtNBWxdX2UKGgGRz/wAAAAAAAAaAdLGWgIR0BhjV+PRzBAdX2UKGgGRz/wAAAAAAAAaAdLZGgIR0Bhkbv1DjR2dX2UKGgGRz/wAAAAAAAAaAdLJWgIR0Bhk0gGKQ7tdX2UKGgGRz/wAAAAAAAAaAdLCWgIR0Bhk6Rhc7hfdX2UKGgGRz/wAAAAAAAAaAdLPWgIR0BhllkQPI4mdX2UKGgGRwAAAAAAAAAAaAdLLGgIR0BhmDtqpLmIdX2UKGgGRz/wAAAAAAAAaAdLFmgIR0BhmRzvJA+qdX2UKGgGRwAAAAAAAAAAaAdLCGgIR0BhmZElVtGedX2UKGgGRz/wAAAAAAAAaAdLHWgIR0BhmsW2w3YMdX2UKGgGRwAAAAAAAAAAaAdLFWgIR0Bhm9H6MzdldX2UKGgGRz/wAAAAAAAAaAdLJWgIR0BhnVRpDeCTdX2UKGgGRwAAAAAAAAAAaAdLXmgIR0BhoY9cKPXDdX2UKGgGRz/wAAAAAAAAaAdLNmgIR0BhpB6+nIhhdX2UKGgGRz/wAAAAAAAAaAdLKWgIR0Bhpc/r0J4TdX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bhqd2cJ+lTdX2UKGgGRwAAAAAAAAAAaAdLCGgIR0BhqjBInSfEdX2UKGgGRwAAAAAAAAAAaAdLMWgIR0BhrDulXRw7dX2UKGgGRwAAAAAAAAAAaAdLG2gIR0BhraVSn+AFdX2UKGgGRwAAAAAAAAAAaAdLD2gIR0BhrkzAN5MUdX2UKGgGRz/wAAAAAAAAaAdLDWgIR0BhrtCeEqUedX2UKGgGRz/wAAAAAAAAaAdLD2gIR0Bhr3ARChN/dX2UKGgGRz/wAAAAAAAAaAdLEGgIR0BhsBqqOtGNdX2UKGgGRwAAAAAAAAAAaAdLTGgIR0Bhsx8KG+K1dX2UKGgGRwAAAAAAAAAAaAdLE2gIR0Bhs9+NLlFMdX2UKGgGRz/wAAAAAAAAaAdLSmgIR0BhtvNLUTcqdX2UKGgGRz/wAAAAAAAAaAdLXWgIR0BhuqXv6TGHdX2UKGgGRwAAAAAAAAAAaAdLZGgIR0BhvsIPbwjMdX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bhw0ZHd43WdX2UKGgGRz/wAAAAAAAAaAdLNGgIR0BhxZuqFRHgdX2UKGgGRwAAAAAAAAAAaAdLEGgIR0BhxkMI/qxDdX2UKGgGRz/wAAAAAAAAaAdLE2gIR0BhxxQemvW6dX2UKGgGRz/wAAAAAAAAaAdLG2gIR0BhyDMmnfl7dX2UKGgGRz/wAAAAAAAAaAdLI2gIR0Bhyaj59E1EdX2UKGgGRwAAAAAAAAAAaAdLKGgIR0Bhy2SfUWl/dX2UKGgGRwAAAAAAAAAAaAdLL2gIR0BhzVjXnQpndX2UKGgGRwAAAAAAAAAAaAdLEmgIR0BhziBGx2SudX2UKGgGRwAAAAAAAAAAaAdLIGgIR0Bhz2psGgSOdWUu"
|
48 |
},
|
49 |
"ep_success_buffer": {
|
50 |
":type:": "<class 'collections.deque'>",
|
|
|
83 |
"__module__": "stable_baselines3.common.buffers",
|
84 |
"__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
|
85 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
86 |
+
"__init__": "<function ReplayBuffer.__init__ at 0x7f482b759120>",
|
87 |
+
"add": "<function ReplayBuffer.add at 0x7f482b7591b0>",
|
88 |
+
"sample": "<function ReplayBuffer.sample at 0x7f482b759240>",
|
89 |
+
"_get_samples": "<function ReplayBuffer._get_samples at 0x7f482b7592d0>",
|
90 |
+
"_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7f482b759360>)>",
|
91 |
"__abstractmethods__": "frozenset()",
|
92 |
+
"_abc_impl": "<_abc._abc_data object at 0x7f482b8d8280>"
|
93 |
},
|
94 |
"replay_buffer_kwargs": {},
|
95 |
"train_freq": {
|
dqn-FrozenLake-v1/policy.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49504
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:817371219b382eb135a809046dcf654449d8fa7e48e735fea475e3c8698394cb
|
3 |
size 49504
|
dqn-FrozenLake-v1/policy.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 48562
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71d447c3c0a9f4d63133039505de1b3af0fd065e76303af7af9755a5a65bb439
|
3 |
size 48562
|
replay.mp4
CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward": 0.6, "std_reward": 0.4898979485566356, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-10-
|
|
|
1 |
+
{"mean_reward": 0.6, "std_reward": 0.4898979485566356, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-10-03T15:47:13.874801"}
|
train_eval_metrics.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:233a2796fe91f8b17b70265dac2a96e3c3c9b76e15fddac3e961dc83af3eb710
|
3 |
+
size 68704
|