|
import gradio as gr |
|
from huggingface_hub import HfApi, hf_hub_download |
|
from huggingface_hub.repocard import metadata_load |
|
|
|
import pandas as pd |
|
|
|
from utils import * |
|
|
|
api = HfApi() |
|
|
|
def get_user_models(hf_username, env_tag, lib_tag): |
|
""" |
|
List the Reinforcement Learning models |
|
from user given environment and lib |
|
:param hf_username: User HF username |
|
:param env_tag: Environment tag |
|
:param lib_tag: Library tag |
|
""" |
|
api = HfApi() |
|
models = api.list_models(author=hf_username, filter=["reinforcement-learning", env_tag, lib_tag]) |
|
|
|
user_model_ids = [x.modelId for x in models] |
|
return user_model_ids |
|
|
|
|
|
def get_metadata(model_id): |
|
""" |
|
Get model metadata (contains evaluation data) |
|
:param model_id |
|
""" |
|
try: |
|
readme_path = hf_hub_download(model_id, filename="README.md") |
|
return metadata_load(readme_path) |
|
except requests.exceptions.HTTPError: |
|
|
|
return None |
|
|
|
|
|
def parse_metrics_accuracy(meta): |
|
""" |
|
Get model results and parse it |
|
:param meta: model metadata |
|
""" |
|
if "model-index" not in meta: |
|
return None |
|
result = meta["model-index"][0]["results"] |
|
metrics = result[0]["metrics"] |
|
accuracy = metrics[0]["value"] |
|
|
|
return accuracy |
|
|
|
|
|
def parse_rewards(accuracy): |
|
""" |
|
Parse mean_reward and std_reward |
|
:param accuracy: model results |
|
""" |
|
default_std = -1000 |
|
default_reward= -1000 |
|
if accuracy != None: |
|
accuracy = str(accuracy) |
|
parsed = accuracy.split(' +/- ') |
|
if len(parsed)>1: |
|
mean_reward = float(parsed[0]) |
|
std_reward = float(parsed[1]) |
|
elif len(parsed)==1: |
|
mean_reward = float(parsed[0]) |
|
std_reward = float(0) |
|
else: |
|
mean_reward = float(default_std) |
|
std_reward = float(default_reward) |
|
else: |
|
mean_reward = float(default_std) |
|
std_reward = float(default_reward) |
|
|
|
return mean_reward, std_reward |
|
|
|
def calculate_best_result(user_model_ids): |
|
""" |
|
Calculate the best results of a unit |
|
best_result = mean_reward - std_reward |
|
:param user_model_ids: RL models of a user |
|
""" |
|
best_result = -100 |
|
best_model_id = "" |
|
for model in user_model_ids: |
|
meta = get_metadata(model) |
|
if meta is None: |
|
continue |
|
accuracy = parse_metrics_accuracy(meta) |
|
mean_reward, std_reward = parse_rewards(accuracy) |
|
result = mean_reward - std_reward |
|
if result > best_result: |
|
best_result = result |
|
best_model_id = model |
|
|
|
return best_result, best_model_id |
|
|
|
def check_if_passed(model): |
|
""" |
|
Check if result >= baseline |
|
to know if you pass |
|
:param model: user model |
|
""" |
|
if model["best_result"] >= model["min_result"]: |
|
model["passed_"] = True |
|
|
|
def certification(hf_username): |
|
results_certification = [ |
|
{ |
|
"unit": "Unit 1", |
|
"env": "LunarLander-v2", |
|
"library": "stable-baselines3", |
|
"min_result": 200, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 2", |
|
"env": "Taxi-v3", |
|
"library": "q-learning", |
|
"min_result": 4, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 3", |
|
"env": "SpaceInvadersNoFrameskip-v4", |
|
"library": "stable-baselines3", |
|
"min_result": 200, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 4", |
|
"env": "CartPole-v1", |
|
"library": "reinforce", |
|
"min_result": 350, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 4", |
|
"env": "Pixelcopter-PLE-v0", |
|
"library": "reinforce", |
|
"min_result": 5, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 5", |
|
"env": "ML-Agents-SnowballTarget", |
|
"library": "ml-agents", |
|
"min_result": -100, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 5", |
|
"env": "ML-Agents-Pyramids", |
|
"library": "ml-agents", |
|
"min_result": -100, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 6", |
|
"env": "AntBulletEnv-v0", |
|
"library": "stable-baselines3", |
|
"min_result": 650, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 6", |
|
"env": "PandaReachDense-v2", |
|
"library": "stable-baselines3", |
|
"min_result": -3.5, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 7", |
|
"env": "ML-Agents-SoccerTwos", |
|
"library": "ml-agents", |
|
"min_result": -100, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 8 PI", |
|
"env": "GodotRL-JumperHard", |
|
"library": "cleanrl", |
|
"min_result": -100, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
{ |
|
"unit": "Unit 8 PII", |
|
"env": "Vizdoom-Battle", |
|
"library": "cleanrl", |
|
"min_result": -100, |
|
"best_result": 0, |
|
"best_model_id": "", |
|
"passed_": False |
|
}, |
|
] |
|
for unit in results_certification: |
|
|
|
user_models = get_user_models(hf_username, unit['env'], unit['library']) |
|
|
|
|
|
best_result, best_model_id = calculate_best_result(user_models) |
|
|
|
|
|
unit["best_result"] = best_result |
|
unit["best_model_id"] = make_clickable_model(best_model_id) |
|
|
|
|
|
check_if_passed(unit) |
|
unit["passed"] = pass_emoji(unit["passed_"]) |
|
|
|
print(results_certification) |
|
|
|
df = pd.DataFrame(results_certification) |
|
df = df[['passed', 'unit', 'env', 'min_result', 'best_result', 'best_model_id']] |
|
return df |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(f""" |
|
# π Check your progress in the Deep Reinforcement Learning Course π |
|
You can check your progress here. |
|
|
|
- To get a certificate of completion, you must **pass 80% of the assignments before the end of April 2023**. |
|
- To get an honors certificate, you must **pass 100% of the assignments before the end of April 2023**. |
|
|
|
To pass an assignment your model result (mean_reward - std_reward) must be >= min_result |
|
|
|
**When min_result = -100 it means that you just need to push a model to pass this hands-on. No need to reach a certain result.** |
|
|
|
Just type your Hugging Face Username π€ (in my case ThomasSimonini) |
|
""") |
|
|
|
hf_username = gr.Textbox(placeholder="ThomasSimonini", label="Your Hugging Face Username") |
|
|
|
check_progress_button = gr.Button(value="Check my progress") |
|
output = gr.components.Dataframe(value= certification(hf_username), headers=["Pass?", "Unit", "Environment", "Baseline", "Your best result", "Your best model id"], datatype=["markdown", "markdown", "markdown", "number", "number", "markdown", "bool"]) |
|
check_progress_button.click(fn=certification, inputs=hf_username, outputs=output) |
|
|
|
demo.launch() |