sgoodfriend
commited on
Commit
•
81552e0
1
Parent(s):
b05c680
PPO playing BreakoutNoFrameskip-v4 from https://github.com/sgoodfriend/rl-algo-impls/tree/e47a44c4d891f48885af0b1605b30d19fc67b5af
Browse files- README.md +12 -12
- compare_runs.py +11 -7
- huggingface_publish.py +8 -7
- replay.meta.json +1 -1
- replay.mp4 +0 -0
- saved_models/ppo-impala-BreakoutNoFrameskip-v4-S3-best/model.pth +3 -0
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
library_name: rl-algo-impls
|
3 |
tags:
|
4 |
-
-
|
5 |
- ppo
|
6 |
- deep-reinforcement-learning
|
7 |
- reinforcement-learning
|
@@ -10,18 +10,18 @@ model-index:
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
-
value:
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
17 |
name: reinforcement-learning
|
18 |
dataset:
|
19 |
-
name:
|
20 |
-
type:
|
21 |
---
|
22 |
-
# **PPO** Agent playing **
|
23 |
|
24 |
-
This is a trained model of a **PPO** agent playing **
|
25 |
|
26 |
All models trained at this commit can be found at https://api.wandb.ai/links/sgoodfriend/v4wd7cp5.
|
27 |
|
@@ -31,9 +31,9 @@ This model was trained from 3 trainings of **PPO** agents using different initia
|
|
31 |
|
32 |
| algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
|
33 |
|:-------|:-----------------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
|
34 |
-
| ppo | BreakoutNoFrameskip-v4 | 1 |
|
35 |
-
| ppo | BreakoutNoFrameskip-v4 | 2 |
|
36 |
-
| ppo | BreakoutNoFrameskip-v4 | 3 |
|
37 |
|
38 |
|
39 |
### Prerequisites: Weights & Biases (WandB)
|
@@ -56,7 +56,7 @@ results. You might need to checkout the commit the agent was trained on:
|
|
56 |
[e47a44c](https://github.com/sgoodfriend/rl-algo-impls/tree/e47a44c4d891f48885af0b1605b30d19fc67b5af).
|
57 |
```
|
58 |
# Downloads the model, sets hyperparameters, and runs agent for 3 episodes
|
59 |
-
python enjoy.py --wandb-run-path=sgoodfriend/rl-algo-impls-benchmarks/
|
60 |
```
|
61 |
|
62 |
Setup hasn't been completely worked out yet, so you might be best served by using Google
|
@@ -72,7 +72,7 @@ commit the agent was trained on: [e47a44c](https://github.com/sgoodfriend/rl-alg
|
|
72 |
training is deterministic, different hardware will give different results.
|
73 |
|
74 |
```
|
75 |
-
python train.py --algo ppo --env
|
76 |
```
|
77 |
|
78 |
Setup hasn't been completely worked out yet, so you might be best served by using Google
|
@@ -133,7 +133,7 @@ policy_hyperparams:
|
|
133 |
cnn_layers_init_orthogonal: false
|
134 |
cnn_style: impala
|
135 |
init_layers_orthogonal: true
|
136 |
-
seed:
|
137 |
use_deterministic_algorithms: true
|
138 |
wandb_entity: null
|
139 |
wandb_project_name: rl-algo-impls-benchmarks
|
|
|
1 |
---
|
2 |
library_name: rl-algo-impls
|
3 |
tags:
|
4 |
+
- BreakoutNoFrameskip-v4
|
5 |
- ppo
|
6 |
- deep-reinforcement-learning
|
7 |
- reinforcement-learning
|
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
+
value: 516.88 +/- 155.01
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
17 |
name: reinforcement-learning
|
18 |
dataset:
|
19 |
+
name: BreakoutNoFrameskip-v4
|
20 |
+
type: BreakoutNoFrameskip-v4
|
21 |
---
|
22 |
+
# **PPO** Agent playing **BreakoutNoFrameskip-v4**
|
23 |
|
24 |
+
This is a trained model of a **PPO** agent playing **BreakoutNoFrameskip-v4** using the [/sgoodfriend/rl-algo-impls](https://github.com/sgoodfriend/rl-algo-impls) repo.
|
25 |
|
26 |
All models trained at this commit can be found at https://api.wandb.ai/links/sgoodfriend/v4wd7cp5.
|
27 |
|
|
|
31 |
|
32 |
| algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
|
33 |
|:-------|:-----------------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
|
34 |
+
| ppo | BreakoutNoFrameskip-v4 | 1 | 502.562 | 161.406 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/ntpe1h0y) |
|
35 |
+
| ppo | BreakoutNoFrameskip-v4 | 2 | 426.562 | 85.8509 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/olgzm7mt) |
|
36 |
+
| ppo | BreakoutNoFrameskip-v4 | 3 | 516.875 | 155.012 | 16 | * | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/kxf84p5u) |
|
37 |
|
38 |
|
39 |
### Prerequisites: Weights & Biases (WandB)
|
|
|
56 |
[e47a44c](https://github.com/sgoodfriend/rl-algo-impls/tree/e47a44c4d891f48885af0b1605b30d19fc67b5af).
|
57 |
```
|
58 |
# Downloads the model, sets hyperparameters, and runs agent for 3 episodes
|
59 |
+
python enjoy.py --wandb-run-path=sgoodfriend/rl-algo-impls-benchmarks/kxf84p5u
|
60 |
```
|
61 |
|
62 |
Setup hasn't been completely worked out yet, so you might be best served by using Google
|
|
|
72 |
training is deterministic, different hardware will give different results.
|
73 |
|
74 |
```
|
75 |
+
python train.py --algo ppo --env BreakoutNoFrameskip-v4 --seed 3
|
76 |
```
|
77 |
|
78 |
Setup hasn't been completely worked out yet, so you might be best served by using Google
|
|
|
133 |
cnn_layers_init_orthogonal: false
|
134 |
cnn_style: impala
|
135 |
init_layers_orthogonal: true
|
136 |
+
seed: 3
|
137 |
use_deterministic_algorithms: true
|
138 |
wandb_entity: null
|
139 |
wandb_project_name: rl-algo-impls-benchmarks
|
compare_runs.py
CHANGED
@@ -143,16 +143,16 @@ if __name__ == "__main__":
|
|
143 |
help="WandB tag for experiment commit (i.e. benchmark_5540e1f)",
|
144 |
)
|
145 |
parser.add_argument(
|
146 |
-
"--
|
147 |
type=str,
|
148 |
nargs="*",
|
149 |
help="Environments to exclude from comparison",
|
150 |
)
|
151 |
# parser.set_defaults(
|
152 |
-
# wandb_hostname_tag=["
|
153 |
-
# wandb_control_tag=["
|
154 |
-
# wandb_experiment_tag=["
|
155 |
-
# exclude_envs=[
|
156 |
# )
|
157 |
args = parser.parse_args()
|
158 |
print(args)
|
@@ -166,15 +166,19 @@ if __name__ == "__main__":
|
|
166 |
runs_by_run_group: Dict[RunGroup, RunGroupRuns] = {}
|
167 |
wandb_hostname_tags = set(args.wandb_hostname_tag)
|
168 |
for r in all_runs:
|
|
|
|
|
169 |
wandb_tags = set(r.config.get("wandb_tags", []))
|
170 |
if not wandb_tags or not wandb_hostname_tags & wandb_tags:
|
171 |
continue
|
172 |
-
rg = RunGroup(r.config["algo"], r.config["env"])
|
173 |
if args.exclude_envs and rg.env_id in args.exclude_envs:
|
174 |
continue
|
175 |
if rg not in runs_by_run_group:
|
176 |
runs_by_run_group[rg] = RunGroupRuns(
|
177 |
-
rg,
|
|
|
|
|
178 |
)
|
179 |
runs_by_run_group[rg].add_run(r)
|
180 |
df = RunGroupRuns.data_frame(runs_by_run_group.values()).round(decimals=2)
|
|
|
143 |
help="WandB tag for experiment commit (i.e. benchmark_5540e1f)",
|
144 |
)
|
145 |
parser.add_argument(
|
146 |
+
"--exclude-envs",
|
147 |
type=str,
|
148 |
nargs="*",
|
149 |
help="Environments to exclude from comparison",
|
150 |
)
|
151 |
# parser.set_defaults(
|
152 |
+
# wandb_hostname_tag=["host_150-230-44-105", "host_155-248-214-128"],
|
153 |
+
# wandb_control_tag=["benchmark_fbc943f"],
|
154 |
+
# wandb_experiment_tag=["benchmark_f59bf74"],
|
155 |
+
# exclude_envs=[],
|
156 |
# )
|
157 |
args = parser.parse_args()
|
158 |
print(args)
|
|
|
166 |
runs_by_run_group: Dict[RunGroup, RunGroupRuns] = {}
|
167 |
wandb_hostname_tags = set(args.wandb_hostname_tag)
|
168 |
for r in all_runs:
|
169 |
+
if r.state != "finished":
|
170 |
+
continue
|
171 |
wandb_tags = set(r.config.get("wandb_tags", []))
|
172 |
if not wandb_tags or not wandb_hostname_tags & wandb_tags:
|
173 |
continue
|
174 |
+
rg = RunGroup(r.config["algo"], r.config.get("env_id") or r.config["env"])
|
175 |
if args.exclude_envs and rg.env_id in args.exclude_envs:
|
176 |
continue
|
177 |
if rg not in runs_by_run_group:
|
178 |
runs_by_run_group[rg] = RunGroupRuns(
|
179 |
+
rg,
|
180 |
+
args.wandb_control_tag,
|
181 |
+
args.wandb_experiment_tag,
|
182 |
)
|
183 |
runs_by_run_group[rg].add_run(r)
|
184 |
df = RunGroupRuns.data_frame(runs_by_run_group.values()).round(decimals=2)
|
huggingface_publish.py
CHANGED
@@ -38,12 +38,12 @@ def publish(
|
|
38 |
api = wandb.Api()
|
39 |
runs = [api.run(rp) for rp in wandb_run_paths]
|
40 |
algo = runs[0].config["algo"]
|
41 |
-
|
42 |
evaluations = [
|
43 |
evaluate_model(
|
44 |
EvalArgs(
|
45 |
algo,
|
46 |
-
|
47 |
seed=r.config.get("seed", None),
|
48 |
render=False,
|
49 |
best=True,
|
@@ -80,9 +80,10 @@ def publish(
|
|
80 |
|
81 |
github_url = "https://github.com/sgoodfriend/rl-algo-impls"
|
82 |
commit_hash = run_metadata.get("git", {}).get("commit", None)
|
|
|
83 |
card_text = model_card_text(
|
84 |
algo,
|
85 |
-
|
86 |
github_url,
|
87 |
commit_hash,
|
88 |
wandb_report_url,
|
@@ -97,7 +98,7 @@ def publish(
|
|
97 |
metadata = {
|
98 |
"library_name": "rl-algo-impls",
|
99 |
"tags": [
|
100 |
-
|
101 |
algo,
|
102 |
"deep-reinforcement-learning",
|
103 |
"reinforcement-learning",
|
@@ -119,8 +120,8 @@ def publish(
|
|
119 |
"name": "reinforcement-learning",
|
120 |
},
|
121 |
"dataset": {
|
122 |
-
"name":
|
123 |
-
"type":
|
124 |
},
|
125 |
}
|
126 |
],
|
@@ -159,7 +160,7 @@ def publish(
|
|
159 |
repo_id=huggingface_repo,
|
160 |
folder_path=repo_dir_path,
|
161 |
path_in_repo="",
|
162 |
-
commit_message=f"{algo.upper()} playing {
|
163 |
token=huggingface_token,
|
164 |
)
|
165 |
print(f"Pushed model to the hub: {repo_url}")
|
|
|
38 |
api = wandb.Api()
|
39 |
runs = [api.run(rp) for rp in wandb_run_paths]
|
40 |
algo = runs[0].config["algo"]
|
41 |
+
hyperparam_id = runs[0].config["env"]
|
42 |
evaluations = [
|
43 |
evaluate_model(
|
44 |
EvalArgs(
|
45 |
algo,
|
46 |
+
hyperparam_id,
|
47 |
seed=r.config.get("seed", None),
|
48 |
render=False,
|
49 |
best=True,
|
|
|
80 |
|
81 |
github_url = "https://github.com/sgoodfriend/rl-algo-impls"
|
82 |
commit_hash = run_metadata.get("git", {}).get("commit", None)
|
83 |
+
env_id = runs[0].config.get("env_id") or runs[0].config["env"]
|
84 |
card_text = model_card_text(
|
85 |
algo,
|
86 |
+
env_id,
|
87 |
github_url,
|
88 |
commit_hash,
|
89 |
wandb_report_url,
|
|
|
98 |
metadata = {
|
99 |
"library_name": "rl-algo-impls",
|
100 |
"tags": [
|
101 |
+
env_id,
|
102 |
algo,
|
103 |
"deep-reinforcement-learning",
|
104 |
"reinforcement-learning",
|
|
|
120 |
"name": "reinforcement-learning",
|
121 |
},
|
122 |
"dataset": {
|
123 |
+
"name": env_id,
|
124 |
+
"type": env_id,
|
125 |
},
|
126 |
}
|
127 |
],
|
|
|
160 |
repo_id=huggingface_repo,
|
161 |
folder_path=repo_dir_path,
|
162 |
path_in_repo="",
|
163 |
+
commit_message=f"{algo.upper()} playing {env_id} from {github_url}/tree/{commit_hash}",
|
164 |
token=huggingface_token,
|
165 |
)
|
166 |
print(f"Pushed model to the hub: {repo_url}")
|
replay.meta.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "160x210", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/
|
|
|
1 |
+
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "160x210", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/tmpvd_d2os3/ppo-impala-BreakoutNoFrameskip-v4/replay.mp4"]}, "episode": {"r": 421.0, "l": 9640, "t": 32.27043}}
|
replay.mp4
CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
saved_models/ppo-impala-BreakoutNoFrameskip-v4-S3-best/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d54070e19010576c52576c90332a2427c3a23e91e23db7e2e6838c96c4ba45ed
|
3 |
+
size 4376749
|