Spaces:
Runtime error
Runtime error
Fix for new tasks
Browse files
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import os
|
|
|
2 |
from pathlib import Path
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
import requests
|
@@ -14,6 +16,9 @@ auth_token = os.getenv("HF_HUB_TOKEN")
|
|
14 |
header = {"Authorization": "Bearer " + auth_token}
|
15 |
|
16 |
TASKS = get_dataset_config_names("ought/raft")
|
|
|
|
|
|
|
17 |
# Split and capitalize the task names, e.g. banking_77 => Banking 77
|
18 |
FORMATTED_TASK_NAMES = [" ".join(t.capitalize() for t in task.split("_")) for task in TASKS]
|
19 |
|
@@ -36,15 +41,16 @@ def download_submissions():
|
|
36 |
tags = extract_tags(dataset)
|
37 |
if tags.get("benchmark") == "ought/raft" and tags.get("type") == "evaluation":
|
38 |
submissions.append(dataset)
|
|
|
39 |
return submissions
|
40 |
|
41 |
|
42 |
def format_submissions(submissions):
|
43 |
-
submission_data = {**{"Submission": []}, **{t: [] for t in TASKS}}
|
44 |
|
45 |
# TODO(lewtun): delete / filter all the junk repos from development
|
46 |
# The following picks the latest submissions which adhere to the model card schema
|
47 |
-
for submission in submissions[-
|
48 |
submission_id = submission["id"]
|
49 |
response = requests.get(
|
50 |
f"http://huggingface.co/api/datasets/{submission_id}?full=true",
|
@@ -54,15 +60,22 @@ def format_submissions(submissions):
|
|
54 |
card_data = data["card_data"]
|
55 |
submission_name = card_data["submission_dataset"]
|
56 |
submission_data["Submission"].append(submission_name)
|
|
|
|
|
|
|
|
|
57 |
|
58 |
for task in card_data["results"]:
|
59 |
task_data = task["task"]
|
60 |
task_name = task_data["name"]
|
|
|
|
|
|
|
61 |
score = task_data["metrics"][0]["value"]
|
62 |
submission_data[task_name].append(score)
|
63 |
|
64 |
df = pd.DataFrame(submission_data)
|
65 |
-
df.insert(
|
66 |
df = df.copy().sort_values("Overall", ascending=False).reset_index().rename(columns={"index": "Rank"})
|
67 |
df.rename(columns={k: v for k, v in zip(TASKS, FORMATTED_TASK_NAMES)}, inplace=True)
|
68 |
return df
|
|
|
1 |
import os
|
2 |
+
from datetime import datetime
|
3 |
from pathlib import Path
|
4 |
+
from re import sub
|
5 |
|
6 |
import pandas as pd
|
7 |
import requests
|
|
|
16 |
header = {"Authorization": "Bearer " + auth_token}
|
17 |
|
18 |
TASKS = get_dataset_config_names("ought/raft")
|
19 |
+
# TODO(lewtun): Evaluate with new subtasks and remove this filter
|
20 |
+
TASKS_TO_EXCLUDE = ["gpai_initiatives", "ade_corpus_v2", "tweet_eval_hate"]
|
21 |
+
TASKS = [t for t in TASKS if t not in TASKS_TO_EXCLUDE]
|
22 |
# Split and capitalize the task names, e.g. banking_77 => Banking 77
|
23 |
FORMATTED_TASK_NAMES = [" ".join(t.capitalize() for t in task.split("_")) for task in TASKS]
|
24 |
|
|
|
41 |
tags = extract_tags(dataset)
|
42 |
if tags.get("benchmark") == "ought/raft" and tags.get("type") == "evaluation":
|
43 |
submissions.append(dataset)
|
44 |
+
submissions = sorted(submissions, key = lambda x: int(x["id"].split("-")[-1]))
|
45 |
return submissions
|
46 |
|
47 |
|
48 |
def format_submissions(submissions):
|
49 |
+
submission_data = {**{"Submission": []}, **{"Date": []}, **{t: [] for t in TASKS}}
|
50 |
|
51 |
# TODO(lewtun): delete / filter all the junk repos from development
|
52 |
# The following picks the latest submissions which adhere to the model card schema
|
53 |
+
for submission in submissions[-1:]:
|
54 |
submission_id = submission["id"]
|
55 |
response = requests.get(
|
56 |
f"http://huggingface.co/api/datasets/{submission_id}?full=true",
|
|
|
60 |
card_data = data["card_data"]
|
61 |
submission_name = card_data["submission_dataset"]
|
62 |
submission_data["Submission"].append(submission_name)
|
63 |
+
submission_id = card_data["submission_id"]
|
64 |
+
timestamp = submission_id.split("-")[-1]
|
65 |
+
timestamp = pd.to_datetime(int(timestamp))
|
66 |
+
submission_data["Date"].append(datetime.date(timestamp))
|
67 |
|
68 |
for task in card_data["results"]:
|
69 |
task_data = task["task"]
|
70 |
task_name = task_data["name"]
|
71 |
+
# TODO(lewtun): Evaluate with new subtasks and remove this filter
|
72 |
+
if task_name in TASKS_TO_EXCLUDE:
|
73 |
+
continue
|
74 |
score = task_data["metrics"][0]["value"]
|
75 |
submission_data[task_name].append(score)
|
76 |
|
77 |
df = pd.DataFrame(submission_data)
|
78 |
+
df.insert(2, "Overall", df[TASKS].mean(axis=1))
|
79 |
df = df.copy().sort_values("Overall", ascending=False).reset_index().rename(columns={"index": "Rank"})
|
80 |
df.rename(columns={k: v for k, v in zip(TASKS, FORMATTED_TASK_NAMES)}, inplace=True)
|
81 |
return df
|