nukavarapusantoshkumar commited on
Commit
82ee7b4
·
verified ·
1 Parent(s): 19a6046

Initial commit

Browse files
args.yml CHANGED
@@ -56,7 +56,7 @@
56
  - - save_replay_buffer
57
  - false
58
  - - seed
59
- - 1029813955
60
  - - storage
61
  - null
62
  - - study_name
 
56
  - - save_replay_buffer
57
  - false
58
  - - seed
59
+ - 3106717846
60
  - - storage
61
  - null
62
  - - study_name
dqn-FrozenLake-v1.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bc98ec4f2095bfbbf878bcecb6adafd60d4a7b47e4cfb74e4c7409751004462
3
  size 115453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9167e7d3d13287f52535a22a1c77b466b58a363549a225ffae5d1fa855d860c
3
  size 115453
dqn-FrozenLake-v1/data CHANGED
@@ -5,15 +5,15 @@
5
  "__module__": "stable_baselines3.dqn.policies",
6
  "__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
7
  "__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
8
- "__init__": "<function DQNPolicy.__init__ at 0x7fd7b6023910>",
9
- "_build": "<function DQNPolicy._build at 0x7fd7b60239a0>",
10
- "make_q_net": "<function DQNPolicy.make_q_net at 0x7fd7b6023a30>",
11
- "forward": "<function DQNPolicy.forward at 0x7fd7b6023ac0>",
12
- "_predict": "<function DQNPolicy._predict at 0x7fd7b6023b50>",
13
- "_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x7fd7b6023be0>",
14
- "set_training_mode": "<function DQNPolicy.set_training_mode at 0x7fd7b6023c70>",
15
  "__abstractmethods__": "frozenset()",
16
- "_abc_impl": "<_abc._abc_data object at 0x7fd7b603c3c0>"
17
  },
18
  "verbose": 1,
19
  "policy_kwargs": {},
@@ -22,7 +22,7 @@
22
  "_num_timesteps_at_start": 0,
23
  "seed": 0,
24
  "action_noise": null,
25
- "start_time": 1727961211373805109,
26
  "learning_rate": {
27
  ":type:": "<class 'function'>",
28
  ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuFQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8aNuLrHEMthZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
@@ -37,14 +37,14 @@
37
  ":type:": "<class 'numpy.ndarray'>",
38
  ":serialized:": "gAWVewAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYIAAAAAAAAAAQAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBhZSMAUOUdJRSlC4="
39
  },
40
- "_episode_num": 3890,
41
  "use_sde": false,
42
  "sde_sample_freq": -1,
43
  "_current_progress_remaining": 0.0,
44
  "_stats_window_size": 100,
45
  "ep_info_buffer": {
46
  ":type:": "<class 'collections.deque'>",
47
- ":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHAAAAAAAAAACMAWyUSw2MAXSUR0BgtLoUzsQedX2UKGgGRwAAAAAAAAAAaAdLBmgIR0BgtRV6u4gBdX2UKGgGRwAAAAAAAAAAaAdLHGgIR0BgtrGJemeldX2UKGgGRwAAAAAAAAAAaAdLE2gIR0Bgt7pcHGCJdX2UKGgGRwAAAAAAAAAAaAdLJWgIR0BgucTrVvuPdX2UKGgGRwAAAAAAAAAAaAdLHGgIR0Bgu2tITXardX2UKGgGRz/wAAAAAAAAaAdLXWgIR0BgwNVWCEpRdX2UKGgGRwAAAAAAAAAAaAdLWmgIR0BgxlQ0oBq9dX2UKGgGRz/wAAAAAAAAaAdLFmgIR0Bgx6CJ40MxdX2UKGgGRz/wAAAAAAAAaAdLDGgIR0BgyEkyDZlGdX2UKGgGRz/wAAAAAAAAaAdLCWgIR0BgyMCmuTzNdX2UKGgGRwAAAAAAAAAAaAdLGmgIR0Bgyi8J2MbWdX2UKGgGRwAAAAAAAAAAaAdLDmgIR0Bgyu/JvHcUdX2UKGgGRz/wAAAAAAAAaAdLImgIR0BgzNCiRGMGdX2UKGgGRz/wAAAAAAAAaAdLH2gIR0BgznQWvbGndX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bg1CufVZs9dX2UKGgGRz/wAAAAAAAAaAdLNGgIR0Bg1vnfVI7OdX2UKGgGRz/wAAAAAAAAaAdLD2gIR0Bg18BMi8nNdX2UKGgGRz/wAAAAAAAAaAdLE2gIR0Bg2OscQyyldX2UKGgGRz/wAAAAAAAAaAdLOmgIR0Bg3CPOpsGgdX2UKGgGRwAAAAAAAAAAaAdLG2gIR0Bg3YZ4wAU+dX2UKGgGRz/wAAAAAAAAaAdLUGgIR0Bg4eXTmW+odX2UKGgGRwAAAAAAAAAAaAdLImgIR0Bg48RtgrpadX2UKGgGRz/wAAAAAAAAaAdLB2gIR0Bg5CnvUjLTdX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bg6cBOpKjBdX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bg73c32mHhdX2UKGgGRz/wAAAAAAAAaAdLE2gIR0Bg8K57PY4AdX2UKGgGRz/wAAAAAAAAaAdLFmgIR0Bg8g6bONYKdX2UKGgGRwAAAAAAAAAAaAdLEWgIR0Bg8wQBgeA/dX2UKGgGRz/wAAAAAAAAaAdLNmgIR0Bg9ZTho/RmdX2UKGgGRz/wAAAAAAAAaAdLLWgIR0Bg95zaK1ohdX2UKGgGRz/wAAAAAAAAaAdLN2gIR0Bg+eTHKfWddX2UKGgGRz/wAAAAAAAAaAdLFWgIR0Bg+rIV/MGHdX2UKGgGRz/wAAAAAAAAaAdLEWgIR0Bg+1i2DxsmdX2UKGgGRz/wAAAAAAAAaAdLF2gIR0Bg/FX7tRekdX2UKGgGRz/wAAAAAAAAaAdLD2gIR0Bg/O4/eLvUdX2UKGgGRwAAAAAAAAAAaAdLD2gIR0Bg/ZTIeYD1dX2UKGgGRz/wAAAAAAAAaAdLDGgIR0Bg/hSJj2BbdX2UKGgGRz/wAAAAAAAAaAdLCmgIR0Bg/odGRV6vdX2UKGgGRwAAAAAAAAAAaAdLE2gIR0Bg/1Jtix3WdX2UKGgGRwAAAAAAAAAAaAdLPGgIR0BhAZwZOzppdX2UKGgGRwAAAAAAAAAAaAdLKGgIR0BhAyM3qAz6dX2UKGgGRwAAAAAAAAAAaAdLEmgIR0BhA80DU3GXdX2UKGgGRwAAAAAAAAAAaAdLJGgIR0BhBVpPAO8TdX2UKGgGRz/wAAAAAAAAaAdLCWgIR0BhBbI91U2ldX2UKGgGRz/wAAAAAAAAaAdLEmgIR0BhBm6qbSZ0dX2UKGgGRwAAAAAAAAAAaAdLCGgIR0BhBsAtFrmAdX2UKGgGRwAAAAAAAAAAaAdLBmgIR0BhBvPRiPQwdX2UKGgGRwAAAAAAAAAAaAdLFGgIR0BhB8jiXIEKdX2UKGgGRwAAAAAAAAAAaAdLFmgIR0BhCOAI6bONdX2UKGgGRz/wAAAAAAAAaAdLIGgIR0BhClJaq0dBdX2UKGgGRwAAAAAAAAAAaAdLJGgIR0BhDDI1cdHUdX2UKGgGRz/wAAAAAAAAaAdLJWgIR0BhDiDkELYxdX2UKGgGRwAAAAAAAAAAaAdLDmgIR0BhDuqrBCUpdX2UKGgGRwAAAAAAAAAAaAdLHmgIR0BhEBhttQ9BdX2UKGgGRz/wAAAAAAAAaAdLI2gIR0BhEYjUutfYdX2UKGgGRz/wAAAAAAAAaAdLFWgIR0BhElm8M/hVdX2UKGgGRz/wAAAAAAAAaAdLJWgIR0BhE+mUGFBZdX2UKGgGRz/wAAAAAAAAaAdLOGgIR0BhFiO5rgwXdX2UKGgGRwAAAAAAAAAAaAdLI2gIR0BhF7tNSIgvdX2UKGgGRwAAAAAAAAAAaAdLN2gIR0BhGgN5MURGdX2UKGgGRwAAAAAAAAAAaAdLEGgIR0BhGqyD7IkrdX2UKGgGRwAAAAAAAAAAaAdLWmgIR0BhHmdCmdiEdX2UKGgGRwAAAAAAAAAAaAdLUWgIR0BhIYh2W6bwdX2UKGgGRz/wAAAAAAAAaAdLGGgIR0BhInT1CgK4dX2UKGgGRwAAAAAAAAAAaAdLZGgIR0BhJnvOQhfTdX2UKGgGRwAAAAAAAAAAaAdLIGgIR0BhJ8NvwVj7dX2UKGgGRwAAAAAAAAAAaAdLFmgIR0BhKKSDAaegdX2UKGgGRwAAAAAAAAAAaAdLC2gIR0BhKS/XXiBHdX2UKGgGRwAAAAAAAAAAaAdLMWgIR0BhK1UMoc7ydX2UKGgGRwAAAAAAAAAAaAdLK2gIR0BhLQKneiztdX2UKGgGRz/wAAAAAAAAaAdLEWgIR0BhLaqhlDnedX2UKGgGRz/wAAAAAAAAaAdLKGgIR0BhL3t6X0GvdX2UKGgGRz/wAAAAAAAAaAdLFWgIR0BhMEVQAMlUdX2UKGgGRz/wAAAAAAAAaAdLHWgIR0BhMY8hcJMQdX2UKGgGRwAAAAAAAAAAaAdLMGgIR0BhM4ekpI+XdX2UKGgGRwAAAAAAAAAAaAdLHWgIR0BhNPOv+wTudX2UKGgGRwAAAAAAAAAAaAdLOWgIR0BhNx6a9bosdX2UKGgGRwAAAAAAAAAAaAdLCmgIR0BhN5ClabF1dX2UKGgGRz/wAAAAAAAAaAdLB2gIR0BhN8f/3nIRdX2UKGgGRwAAAAAAAAAAaAdLImgIR0BhOVE9dNWVdX2UKGgGRwAAAAAAAAAAaAdLEmgIR0BhOgomXw9adX2UKGgGRz/wAAAAAAAAaAdLC2gIR0BhOn9LpRoAdX2UKGgGRz/wAAAAAAAAaAdLK2gIR0BhPDHXEqDsdX2UKGgGRwAAAAAAAAAAaAdLC2gIR0BhPKWX1J18dX2UKGgGRz/wAAAAAAAAaAdLNmgIR0BhPuF8G9pRdX2UKGgGRz/wAAAAAAAAaAdLSmgIR0BhQdaW5YozdX2UKGgGRz/wAAAAAAAAaAdLDmgIR0BhQmHDaXa8dX2UKGgGRwAAAAAAAAAAaAdLDmgIR0BhQxJ7LMcIdX2UKGgGRz/wAAAAAAAAaAdLImgIR0BhRFepn6EbdX2UKGgGRwAAAAAAAAAAaAdLDGgIR0BhRNCZ4Oc2dX2UKGgGRz/wAAAAAAAAaAdLV2gIR0BhSHD+BH09dX2UKGgGRz/wAAAAAAAAaAdLBmgIR0BhSKNZNfw7dX2UKGgGRwAAAAAAAAAAaAdLFGgIR0BhSWmtQsPKdX2UKGgGRwAAAAAAAAAAaAdLGGgIR0BhSlqFh5PedX2UKGgGRz/wAAAAAAAAaAdLKWgIR0BhTE2UB4lhdX2UKGgGRz/wAAAAAAAAaAdLHGgIR0BhTXZqVQhwdX2UKGgGRwAAAAAAAAAAaAdLEmgIR0BhThybQTmGdX2UKGgGRz/wAAAAAAAAaAdLEWgIR0BhTr3yqdYodX2UKGgGRz/wAAAAAAAAaAdLO2gIR0BhUVIwudwvdWUu"
48
  },
49
  "ep_success_buffer": {
50
  ":type:": "<class 'collections.deque'>",
@@ -83,13 +83,13 @@
83
  "__module__": "stable_baselines3.common.buffers",
84
  "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
85
  "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
86
- "__init__": "<function ReplayBuffer.__init__ at 0x7fd7b6169120>",
87
- "add": "<function ReplayBuffer.add at 0x7fd7b61691b0>",
88
- "sample": "<function ReplayBuffer.sample at 0x7fd7b6169240>",
89
- "_get_samples": "<function ReplayBuffer._get_samples at 0x7fd7b61692d0>",
90
- "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7fd7b6169360>)>",
91
  "__abstractmethods__": "frozenset()",
92
- "_abc_impl": "<_abc._abc_data object at 0x7fd7b62ecd80>"
93
  },
94
  "replay_buffer_kwargs": {},
95
  "train_freq": {
 
5
  "__module__": "stable_baselines3.dqn.policies",
6
  "__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
7
  "__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
8
+ "__init__": "<function DQNPolicy.__init__ at 0x7f482b60f910>",
9
+ "_build": "<function DQNPolicy._build at 0x7f482b60f9a0>",
10
+ "make_q_net": "<function DQNPolicy.make_q_net at 0x7f482b60fa30>",
11
+ "forward": "<function DQNPolicy.forward at 0x7f482b60fac0>",
12
+ "_predict": "<function DQNPolicy._predict at 0x7f482b60fb50>",
13
+ "_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x7f482b60fbe0>",
14
+ "set_training_mode": "<function DQNPolicy.set_training_mode at 0x7f482b60fc70>",
15
  "__abstractmethods__": "frozenset()",
16
+ "_abc_impl": "<_abc._abc_data object at 0x7f482b624c80>"
17
  },
18
  "verbose": 1,
19
  "policy_kwargs": {},
 
22
  "_num_timesteps_at_start": 0,
23
  "seed": 0,
24
  "action_noise": null,
25
+ "start_time": 1727970156559401634,
26
  "learning_rate": {
27
  ":type:": "<class 'function'>",
28
  ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuFQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8aNuLrHEMthZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
 
37
  ":type:": "<class 'numpy.ndarray'>",
38
  ":serialized:": "gAWVewAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYIAAAAAAAAAAQAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBhZSMAUOUdJRSlC4="
39
  },
40
+ "_episode_num": 3799,
41
  "use_sde": false,
42
  "sde_sample_freq": -1,
43
  "_current_progress_remaining": 0.0,
44
  "_stats_window_size": 100,
45
  "ep_info_buffer": {
46
  ":type:": "<class 'collections.deque'>",
47
+ ":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHAAAAAAAAAACMAWyUSxaMAXSUR0BhI65Etuk2dX2UKGgGRwAAAAAAAAAAaAdLF2gIR0BhJR4jbBXTdX2UKGgGRwAAAAAAAAAAaAdLDWgIR0BhJd1hb4ahdX2UKGgGRwAAAAAAAAAAaAdLCWgIR0BhJnttygf2dX2UKGgGRwAAAAAAAAAAaAdLJGgIR0BhKH2Xb/OudX2UKGgGRz/wAAAAAAAAaAdLJGgIR0BhKp1FH8TBdX2UKGgGRwAAAAAAAAAAaAdLNmgIR0BhLcmY0EX+dX2UKGgGRz/wAAAAAAAAaAdLIWgIR0BhL6xTsIE9dX2UKGgGRz/wAAAAAAAAaAdLIWgIR0BhMYF/x2B8dX2UKGgGRwAAAAAAAAAAaAdLIGgIR0BhMz4cm0E6dX2UKGgGRz/wAAAAAAAAaAdLNmgIR0BhNg8fV7QcdX2UKGgGRz/wAAAAAAAAaAdLIGgIR0BhN8PFvQ4TdX2UKGgGRwAAAAAAAAAAaAdLY2gIR0BhPgIdELH/dX2UKGgGRz/wAAAAAAAAaAdLO2gIR0BhQVwNsnAqdX2UKGgGRwAAAAAAAAAAaAdLEWgIR0BhQoYixFAndX2UKGgGRz/wAAAAAAAAaAdLDmgIR0BhQz9OymhudX2UKGgGRwAAAAAAAAAAaAdLDmgIR0BhRBvo/zJ7dX2UKGgGRwAAAAAAAAAAaAdLB2gIR0BhRIbsF+uvdX2UKGgGRz/wAAAAAAAAaAdLIGgIR0BhRolMRHwxdX2UKGgGRz/wAAAAAAAAaAdLGWgIR0BhR/MjeKsNdX2UKGgGRwAAAAAAAAAAaAdLDmgIR0BhSKfYjB2wdX2UKGgGRz/wAAAAAAAAaAdLMWgIR0BhS3QdCE6DdX2UKGgGRwAAAAAAAAAAaAdLH2gIR0BhTUqDsdDIdX2UKGgGRz/wAAAAAAAAaAdLC2gIR0BhTeiSJTESdX2UKGgGRz/wAAAAAAAAaAdLL2gIR0BhUMRFqi48dX2UKGgGRz/wAAAAAAAAaAdLEGgIR0BhUacmShaldX2UKGgGRz/wAAAAAAAAaAdLH2gIR0BhU03qAz55dX2UKGgGRz/wAAAAAAAAaAdLH2gIR0BhVOzD4xk/dX2UKGgGRz/wAAAAAAAAaAdLHWgIR0BhVrEvTPSldX2UKGgGRwAAAAAAAAAAaAdLH2gIR0BhWGnVG0/odX2UKGgGRwAAAAAAAAAAaAdLD2gIR0BhWZlcyFfzdX2UKGgGRz/wAAAAAAAAaAdLD2gIR0BhWo/keZG8dX2UKGgGRz/wAAAAAAAAaAdLHmgIR0BhXF4cFQl9dX2UKGgGRz/wAAAAAAAAaAdLCmgIR0BhXPp0OmSAdX2UKGgGRz/wAAAAAAAAaAdLB2gIR0BhXUlb/wRXdX2UKGgGRz/wAAAAAAAAaAdLJ2gIR0BhXxhpg1FZdX2UKGgGRwAAAAAAAAAAaAdLDGgIR0BhX5tix3V1dX2UKGgGRz/wAAAAAAAAaAdLF2gIR0BhYJx5s0pFdX2UKGgGRz/wAAAAAAAAaAdLGmgIR0BhYbhegL7XdX2UKGgGRz/wAAAAAAAAaAdLHmgIR0BhYwPI4lyBdX2UKGgGRz/wAAAAAAAAaAdLRWgIR0BhZenQ6ZH/dX2UKGgGRz/wAAAAAAAAaAdLEGgIR0BhZqESM98rdX2UKGgGRz/wAAAAAAAAaAdLE2gIR0BhZ2mtQsPKdX2UKGgGRz/wAAAAAAAAaAdLW2gIR0Bhavs9jgAIdX2UKGgGRwAAAAAAAAAAaAdLHmgIR0BhbEP1+RYBdX2UKGgGRwAAAAAAAAAAaAdLEWgIR0BhbSQNkOI7dX2UKGgGRz/wAAAAAAAAaAdLM2gIR0BhbzIHTqjadX2UKGgGRwAAAAAAAAAAaAdLB2gIR0Bhb5iRW912dX2UKGgGRwAAAAAAAAAAaAdLOGgIR0Bhceogmqo7dX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bhdb9VFQVLdX2UKGgGRwAAAAAAAAAAaAdLHGgIR0BhdtSflIVedX2UKGgGRz/wAAAAAAAAaAdLFWgIR0Bhd7PyCnP3dX2UKGgGRwAAAAAAAAAAaAdLZGgIR0BhfKCtihFmdX2UKGgGRz/wAAAAAAAAaAdLK2gIR0Bhfq00FbFCdX2UKGgGRwAAAAAAAAAAaAdLDWgIR0Bhf1xEORT1dX2UKGgGRz/wAAAAAAAAaAdLDmgIR0BhgEUsWfsedX2UKGgGRwAAAAAAAAAAaAdLEWgIR0BhgPxYq5LAdX2UKGgGRz/wAAAAAAAAaAdLMmgIR0Bhgyews5GSdX2UKGgGRz/wAAAAAAAAaAdLE2gIR0Bhg9dAxBVudX2UKGgGRz/wAAAAAAAAaAdLUmgIR0Bhh0ZtNzsAdX2UKGgGRwAAAAAAAAAAaAdLGGgIR0BhiF16mfoSdX2UKGgGRwAAAAAAAAAAaAdLNmgIR0BhipTQ3PzGdX2UKGgGRz/wAAAAAAAAaAdLJWgIR0BhjDEtNBWxdX2UKGgGRz/wAAAAAAAAaAdLGWgIR0BhjV+PRzBAdX2UKGgGRz/wAAAAAAAAaAdLZGgIR0Bhkbv1DjR2dX2UKGgGRz/wAAAAAAAAaAdLJWgIR0Bhk0gGKQ7tdX2UKGgGRz/wAAAAAAAAaAdLCWgIR0Bhk6Rhc7hfdX2UKGgGRz/wAAAAAAAAaAdLPWgIR0BhllkQPI4mdX2UKGgGRwAAAAAAAAAAaAdLLGgIR0BhmDtqpLmIdX2UKGgGRz/wAAAAAAAAaAdLFmgIR0BhmRzvJA+qdX2UKGgGRwAAAAAAAAAAaAdLCGgIR0BhmZElVtGedX2UKGgGRz/wAAAAAAAAaAdLHWgIR0BhmsW2w3YMdX2UKGgGRwAAAAAAAAAAaAdLFWgIR0Bhm9H6MzdldX2UKGgGRz/wAAAAAAAAaAdLJWgIR0BhnVRpDeCTdX2UKGgGRwAAAAAAAAAAaAdLXmgIR0BhoY9cKPXDdX2UKGgGRz/wAAAAAAAAaAdLNmgIR0BhpB6+nIhhdX2UKGgGRz/wAAAAAAAAaAdLKWgIR0Bhpc/r0J4TdX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bhqd2cJ+lTdX2UKGgGRwAAAAAAAAAAaAdLCGgIR0BhqjBInSfEdX2UKGgGRwAAAAAAAAAAaAdLMWgIR0BhrDulXRw7dX2UKGgGRwAAAAAAAAAAaAdLG2gIR0BhraVSn+AFdX2UKGgGRwAAAAAAAAAAaAdLD2gIR0BhrkzAN5MUdX2UKGgGRz/wAAAAAAAAaAdLDWgIR0BhrtCeEqUedX2UKGgGRz/wAAAAAAAAaAdLD2gIR0Bhr3ARChN/dX2UKGgGRz/wAAAAAAAAaAdLEGgIR0BhsBqqOtGNdX2UKGgGRwAAAAAAAAAAaAdLTGgIR0Bhsx8KG+K1dX2UKGgGRwAAAAAAAAAAaAdLE2gIR0Bhs9+NLlFMdX2UKGgGRz/wAAAAAAAAaAdLSmgIR0BhtvNLUTcqdX2UKGgGRz/wAAAAAAAAaAdLXWgIR0BhuqXv6TGHdX2UKGgGRwAAAAAAAAAAaAdLZGgIR0BhvsIPbwjMdX2UKGgGRwAAAAAAAAAAaAdLZGgIR0Bhw0ZHd43WdX2UKGgGRz/wAAAAAAAAaAdLNGgIR0BhxZuqFRHgdX2UKGgGRwAAAAAAAAAAaAdLEGgIR0BhxkMI/qxDdX2UKGgGRz/wAAAAAAAAaAdLE2gIR0BhxxQemvW6dX2UKGgGRz/wAAAAAAAAaAdLG2gIR0BhyDMmnfl7dX2UKGgGRz/wAAAAAAAAaAdLI2gIR0Bhyaj59E1EdX2UKGgGRwAAAAAAAAAAaAdLKGgIR0Bhy2SfUWl/dX2UKGgGRwAAAAAAAAAAaAdLL2gIR0BhzVjXnQpndX2UKGgGRwAAAAAAAAAAaAdLEmgIR0BhziBGx2SudX2UKGgGRwAAAAAAAAAAaAdLIGgIR0Bhz2psGgSOdWUu"
48
  },
49
  "ep_success_buffer": {
50
  ":type:": "<class 'collections.deque'>",
 
83
  "__module__": "stable_baselines3.common.buffers",
84
  "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
85
  "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
86
+ "__init__": "<function ReplayBuffer.__init__ at 0x7f482b759120>",
87
+ "add": "<function ReplayBuffer.add at 0x7f482b7591b0>",
88
+ "sample": "<function ReplayBuffer.sample at 0x7f482b759240>",
89
+ "_get_samples": "<function ReplayBuffer._get_samples at 0x7f482b7592d0>",
90
+ "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7f482b759360>)>",
91
  "__abstractmethods__": "frozenset()",
92
+ "_abc_impl": "<_abc._abc_data object at 0x7f482b8d8280>"
93
  },
94
  "replay_buffer_kwargs": {},
95
  "train_freq": {
dqn-FrozenLake-v1/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b79ba8f459bb40f42895d672a5b12bf87986e984ad8135a5fb69fef7f1b8d84
3
  size 49504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:817371219b382eb135a809046dcf654449d8fa7e48e735fea475e3c8698394cb
3
  size 49504
dqn-FrozenLake-v1/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b795b196dfff930e33349c181d1ced9e93df75777827ee8f70ac99abbb696ce6
3
  size 48562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d447c3c0a9f4d63133039505de1b3af0fd065e76303af7af9755a5a65bb439
3
  size 48562
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 0.6, "std_reward": 0.4898979485566356, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-10-03T13:20:47.174209"}
 
1
+ {"mean_reward": 0.6, "std_reward": 0.4898979485566356, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-10-03T15:47:13.874801"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:625866ed4769eecb3688cce38834aef9a4511e9ae8c63b49b736936388d8a79a
3
- size 70384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:233a2796fe91f8b17b70265dac2a96e3c3c9b76e15fddac3e961dc83af3eb710
3
+ size 68704