sgoodfriend
commited on
Commit
·
973f46d
1
Parent(s):
35bb3c8
PPO playing QbertNoFrameskip-v4 from https://github.com/sgoodfriend/rl-algo-impls/tree/e47a44c4d891f48885af0b1605b30d19fc67b5af
Browse files- README.md +10 -10
- benchmark_publish.py +19 -2
- huggingface_publish.py +7 -2
- replay.meta.json +1 -1
- replay.mp4 +0 -0
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
library_name: rl-algo-impls
|
3 |
tags:
|
4 |
-
-
|
5 |
- ppo
|
6 |
- deep-reinforcement-learning
|
7 |
- reinforcement-learning
|
@@ -10,18 +10,18 @@ model-index:
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
-
value:
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
17 |
name: reinforcement-learning
|
18 |
dataset:
|
19 |
-
name:
|
20 |
-
type:
|
21 |
---
|
22 |
-
# **PPO** Agent playing **
|
23 |
|
24 |
-
This is a trained model of a **PPO** agent playing **
|
25 |
|
26 |
All models trained at this commit can be found at https://api.wandb.ai/links/sgoodfriend/v4wd7cp5.
|
27 |
|
@@ -31,9 +31,9 @@ This model was trained from 3 trainings of **PPO** agents using different initia
|
|
31 |
|
32 |
| algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
|
33 |
|:-------|:--------------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
|
34 |
-
| ppo | QbertNoFrameskip-v4 | 1 |
|
35 |
-
| ppo | QbertNoFrameskip-v4 | 2 |
|
36 |
-
| ppo | QbertNoFrameskip-v4 | 3 |
|
37 |
|
38 |
|
39 |
### Prerequisites: Weights & Biases (WandB)
|
@@ -72,7 +72,7 @@ commit the agent was trained on: [e47a44c](https://github.com/sgoodfriend/rl-alg
|
|
72 |
training is deterministic, different hardware will give different results.
|
73 |
|
74 |
```
|
75 |
-
python train.py --algo ppo --env
|
76 |
```
|
77 |
|
78 |
Setup hasn't been completely worked out yet, so you might be best served by using Google
|
|
|
1 |
---
|
2 |
library_name: rl-algo-impls
|
3 |
tags:
|
4 |
+
- QbertNoFrameskip-v4
|
5 |
- ppo
|
6 |
- deep-reinforcement-learning
|
7 |
- reinforcement-learning
|
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
+
value: 14873.44 +/- 1014.52
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
17 |
name: reinforcement-learning
|
18 |
dataset:
|
19 |
+
name: QbertNoFrameskip-v4
|
20 |
+
type: QbertNoFrameskip-v4
|
21 |
---
|
22 |
+
# **PPO** Agent playing **QbertNoFrameskip-v4**
|
23 |
|
24 |
+
This is a trained model of a **PPO** agent playing **QbertNoFrameskip-v4** using the [/sgoodfriend/rl-algo-impls](https://github.com/sgoodfriend/rl-algo-impls) repo.
|
25 |
|
26 |
All models trained at this commit can be found at https://api.wandb.ai/links/sgoodfriend/v4wd7cp5.
|
27 |
|
|
|
31 |
|
32 |
| algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
|
33 |
|:-------|:--------------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
|
34 |
+
| ppo | QbertNoFrameskip-v4 | 1 | 12565.6 | 3873.01 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/2oimcnrm) |
|
35 |
+
| ppo | QbertNoFrameskip-v4 | 2 | 14873.4 | 1014.52 | 16 | * | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/cd50ih0p) |
|
36 |
+
| ppo | QbertNoFrameskip-v4 | 3 | 14429.7 | 3595.16 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/oiccxep8) |
|
37 |
|
38 |
|
39 |
### Prerequisites: Weights & Biases (WandB)
|
|
|
72 |
training is deterministic, different hardware will give different results.
|
73 |
|
74 |
```
|
75 |
+
python train.py --algo ppo --env QbertNoFrameskip-v4 --seed 2
|
76 |
```
|
77 |
|
78 |
Setup hasn't been completely worked out yet, so you might be best served by using Google
|
benchmark_publish.py
CHANGED
@@ -32,6 +32,12 @@ if __name__ == "__main__":
|
|
32 |
parser.add_argument(
|
33 |
"--envs", type=str, nargs="*", help="Optional filter down to these envs"
|
34 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
parser.add_argument(
|
36 |
"--huggingface-user",
|
37 |
type=str,
|
@@ -44,9 +50,14 @@ if __name__ == "__main__":
|
|
44 |
default=3,
|
45 |
help="How many publish jobs can run in parallel",
|
46 |
)
|
|
|
|
|
|
|
47 |
# parser.set_defaults(
|
48 |
-
# wandb_tags=["
|
49 |
-
# wandb_report_url="https://api.wandb.ai/links/sgoodfriend/
|
|
|
|
|
50 |
# )
|
51 |
args = parser.parse_args()
|
52 |
print(args)
|
@@ -65,10 +76,14 @@ if __name__ == "__main__":
|
|
65 |
|
66 |
runs_paths_by_group = defaultdict(list)
|
67 |
for r in runs:
|
|
|
|
|
68 |
algo = r.config["algo"]
|
69 |
env = r.config["env"]
|
70 |
if args.envs and env not in args.envs:
|
71 |
continue
|
|
|
|
|
72 |
run_group = RunGroup(algo, env)
|
73 |
runs_paths_by_group[run_group].append("/".join(r.path))
|
74 |
|
@@ -81,6 +96,8 @@ if __name__ == "__main__":
|
|
81 |
if args.huggingface_user:
|
82 |
publish_args.append("--huggingface-user")
|
83 |
publish_args.append(args.huggingface_user)
|
|
|
|
|
84 |
subprocess.run(publish_args)
|
85 |
|
86 |
tp = ThreadPool(args.pool_size)
|
|
|
32 |
parser.add_argument(
|
33 |
"--envs", type=str, nargs="*", help="Optional filter down to these envs"
|
34 |
)
|
35 |
+
parser.add_argument(
|
36 |
+
"--exclude-envs",
|
37 |
+
type=str,
|
38 |
+
nargs="*",
|
39 |
+
help="Environments to exclude from publishing",
|
40 |
+
)
|
41 |
parser.add_argument(
|
42 |
"--huggingface-user",
|
43 |
type=str,
|
|
|
50 |
default=3,
|
51 |
help="How many publish jobs can run in parallel",
|
52 |
)
|
53 |
+
parser.add_argument(
|
54 |
+
"--virtual-display", action="store_true", help="Use headless virtual display"
|
55 |
+
)
|
56 |
# parser.set_defaults(
|
57 |
+
# wandb_tags=["benchmark_e47a44c", "host_129-146-2-230"],
|
58 |
+
# wandb_report_url="https://api.wandb.ai/links/sgoodfriend/v4wd7cp5",
|
59 |
+
# envs=[],
|
60 |
+
# exclude_envs=[],
|
61 |
# )
|
62 |
args = parser.parse_args()
|
63 |
print(args)
|
|
|
76 |
|
77 |
runs_paths_by_group = defaultdict(list)
|
78 |
for r in runs:
|
79 |
+
if r.state != "finished":
|
80 |
+
continue
|
81 |
algo = r.config["algo"]
|
82 |
env = r.config["env"]
|
83 |
if args.envs and env not in args.envs:
|
84 |
continue
|
85 |
+
if args.exclude_envs and env in args.exclude_envs:
|
86 |
+
continue
|
87 |
run_group = RunGroup(algo, env)
|
88 |
runs_paths_by_group[run_group].append("/".join(r.path))
|
89 |
|
|
|
96 |
if args.huggingface_user:
|
97 |
publish_args.append("--huggingface-user")
|
98 |
publish_args.append(args.huggingface_user)
|
99 |
+
if args.virtual_display:
|
100 |
+
publish_args.append("--virtual-display")
|
101 |
subprocess.run(publish_args)
|
102 |
|
103 |
tp = ThreadPool(args.pool_size)
|
huggingface_publish.py
CHANGED
@@ -29,9 +29,11 @@ def publish(
|
|
29 |
wandb_report_url: str,
|
30 |
huggingface_user: Optional[str] = None,
|
31 |
huggingface_token: Optional[str] = None,
|
|
|
32 |
) -> None:
|
33 |
-
virtual_display
|
34 |
-
|
|
|
35 |
|
36 |
api = wandb.Api()
|
37 |
runs = [api.run(rp) for rp in wandb_run_paths]
|
@@ -178,6 +180,9 @@ if __name__ == "__main__":
|
|
178 |
help="Huggingface user or team to upload model cards",
|
179 |
default=None,
|
180 |
)
|
|
|
|
|
|
|
181 |
args = parser.parse_args()
|
182 |
print(args)
|
183 |
publish(**vars(args))
|
|
|
29 |
wandb_report_url: str,
|
30 |
huggingface_user: Optional[str] = None,
|
31 |
huggingface_token: Optional[str] = None,
|
32 |
+
virtual_display: bool = False,
|
33 |
) -> None:
|
34 |
+
if virtual_display:
|
35 |
+
display = Display(visible=False, size=(1400, 900))
|
36 |
+
display.start()
|
37 |
|
38 |
api = wandb.Api()
|
39 |
runs = [api.run(rp) for rp in wandb_run_paths]
|
|
|
180 |
help="Huggingface user or team to upload model cards",
|
181 |
default=None,
|
182 |
)
|
183 |
+
parser.add_argument(
|
184 |
+
"--virtual-display", action="store_true", help="Use headless virtual display"
|
185 |
+
)
|
186 |
args = parser.parse_args()
|
187 |
print(args)
|
188 |
publish(**vars(args))
|
replay.meta.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "160x210", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/
|
|
|
1 |
+
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "160x210", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/tmp35vo3ma2/ppo-impala-QbertNoFrameskip-v4/replay.mp4"]}, "episode": {"r": 14875.0, "l": 7868, "t": 30.871071}}
|
replay.mp4
CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|