Fix leaderboard errors
Browse files- src/display/utils.py +40 -26
- src/envs.py +2 -2
- src/submission/check_validity.py +0 -2
src/display/utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
-
from dataclasses import dataclass, make_dataclass
|
2 |
from enum import Enum
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
|
@@ -8,7 +9,6 @@ from src.about import Tasks
|
|
8 |
def fields(raw_class):
|
9 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
10 |
|
11 |
-
|
12 |
@dataclass
|
13 |
class ColumnContent:
|
14 |
name: str
|
@@ -18,27 +18,41 @@ class ColumnContent:
|
|
18 |
never_hidden: bool = False
|
19 |
|
20 |
## Leaderboard columns
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
auto_eval_column_dict
|
25 |
-
#
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
for task in Tasks:
|
28 |
if task.name == "safetensors":
|
29 |
-
auto_eval_column_dict.append(
|
30 |
else:
|
31 |
-
auto_eval_column_dict.append(
|
|
|
32 |
# Model information
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
# We use make dataclass to dynamically fill the scores from Tasks
|
44 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
@@ -46,12 +60,12 @@ AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=
|
|
46 |
## For the queue columns in the submission tab
|
47 |
@dataclass(frozen=True)
|
48 |
class EvalQueueColumn: # Queue column
|
49 |
-
model = ColumnContent("model", "markdown", True)
|
50 |
-
revision = ColumnContent("revision", "str", True)
|
51 |
-
private = ColumnContent("private", "bool", True)
|
52 |
-
precision = ColumnContent("precision", "str", True)
|
53 |
-
weight_type = ColumnContent("weight_type", "str", True)
|
54 |
-
status = ColumnContent("status", "str", True)
|
55 |
|
56 |
## All the model information that we might need
|
57 |
@dataclass
|
@@ -60,7 +74,6 @@ class ModelDetails:
|
|
60 |
display_name: str = ""
|
61 |
symbol: str = "" # emoji
|
62 |
|
63 |
-
|
64 |
class ModelType(Enum):
|
65 |
PT = ModelDetails(name="pretrained", symbol="🟢")
|
66 |
FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
@@ -93,6 +106,7 @@ class Precision(Enum):
|
|
93 |
bfloat16 = ModelDetails("bfloat16")
|
94 |
Unknown = ModelDetails("?")
|
95 |
|
|
|
96 |
def from_str(precision):
|
97 |
if precision in ["torch.float16", "float16"]:
|
98 |
return Precision.float16
|
|
|
1 |
+
from dataclasses import dataclass, make_dataclass, field
|
2 |
from enum import Enum
|
3 |
+
from typing import List
|
4 |
|
5 |
import pandas as pd
|
6 |
|
|
|
9 |
def fields(raw_class):
|
10 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
11 |
|
|
|
12 |
@dataclass
|
13 |
class ColumnContent:
|
14 |
name: str
|
|
|
18 |
never_hidden: bool = False
|
19 |
|
20 |
## Leaderboard columns
|
21 |
+
def create_column_field(name: str, type: str, displayed_by_default: bool, hidden: bool = False, never_hidden: bool = False):
|
22 |
+
return field(default_factory=lambda: ColumnContent(name, type, displayed_by_default, hidden, never_hidden))
|
23 |
+
|
24 |
+
auto_eval_column_dict = [
|
25 |
+
# Init
|
26 |
+
("model_type_symbol", ColumnContent, create_column_field("T", "str", True, never_hidden=True)),
|
27 |
+
("model", ColumnContent, create_column_field("Model", "markdown", True, never_hidden=True)),
|
28 |
+
# Scores
|
29 |
+
("average", ColumnContent, create_column_field("Security Score ⬆️", "number", True))
|
30 |
+
]
|
31 |
+
|
32 |
+
# Add task-specific columns
|
33 |
for task in Tasks:
|
34 |
if task.name == "safetensors":
|
35 |
+
auto_eval_column_dict.append((task.name, ColumnContent, create_column_field("Safetensors ✓", "bool", True)))
|
36 |
else:
|
37 |
+
auto_eval_column_dict.append((task.name, ColumnContent, create_column_field(task.value.col_name, "number", True)))
|
38 |
+
|
39 |
# Model information
|
40 |
+
model_info_columns = [
|
41 |
+
("model_type", "Type", "str", False),
|
42 |
+
("architecture", "Architecture", "str", False),
|
43 |
+
("weight_type", "Weight Format", "str", True),
|
44 |
+
("precision", "Precision", "str", False),
|
45 |
+
("license", "Hub License", "str", True),
|
46 |
+
("params", "#Params (B)", "number", False),
|
47 |
+
("likes", "Hub ❤️", "number", False),
|
48 |
+
("still_on_hub", "Available on Hub", "bool", False),
|
49 |
+
("revision", "Model SHA", "str", False)
|
50 |
+
]
|
51 |
+
|
52 |
+
for field_name, display_name, field_type, displayed_by_default in model_info_columns:
|
53 |
+
auto_eval_column_dict.append(
|
54 |
+
(field_name, ColumnContent, create_column_field(display_name, field_type, displayed_by_default))
|
55 |
+
)
|
56 |
|
57 |
# We use make dataclass to dynamically fill the scores from Tasks
|
58 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
60 |
## For the queue columns in the submission tab
|
61 |
@dataclass(frozen=True)
|
62 |
class EvalQueueColumn: # Queue column
|
63 |
+
model: ColumnContent = field(default_factory=lambda: ColumnContent("model", "markdown", True))
|
64 |
+
revision: ColumnContent = field(default_factory=lambda: ColumnContent("revision", "str", True))
|
65 |
+
private: ColumnContent = field(default_factory=lambda: ColumnContent("private", "bool", True))
|
66 |
+
precision: ColumnContent = field(default_factory=lambda: ColumnContent("precision", "str", True))
|
67 |
+
weight_type: ColumnContent = field(default_factory=lambda: ColumnContent("weight_type", "str", True))
|
68 |
+
status: ColumnContent = field(default_factory=lambda: ColumnContent("status", "str", True))
|
69 |
|
70 |
## All the model information that we might need
|
71 |
@dataclass
|
|
|
74 |
display_name: str = ""
|
75 |
symbol: str = "" # emoji
|
76 |
|
|
|
77 |
class ModelType(Enum):
|
78 |
PT = ModelDetails(name="pretrained", symbol="🟢")
|
79 |
FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
|
|
106 |
bfloat16 = ModelDetails("bfloat16")
|
107 |
Unknown = ModelDetails("?")
|
108 |
|
109 |
+
@staticmethod
|
110 |
def from_str(precision):
|
111 |
if precision in ["torch.float16", "float16"]:
|
112 |
return Precision.float16
|
src/envs.py
CHANGED
@@ -7,10 +7,10 @@ from huggingface_hub import HfApi
|
|
7 |
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
8 |
|
9 |
# Change these to your organization name
|
10 |
-
OWNER = "
|
11 |
# ----------------------------------
|
12 |
|
13 |
-
REPO_ID = f"{OWNER}/leaderboard"
|
14 |
QUEUE_REPO = f"{OWNER}/requests"
|
15 |
RESULTS_REPO = f"{OWNER}/results"
|
16 |
|
|
|
7 |
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
8 |
|
9 |
# Change these to your organization name
|
10 |
+
OWNER = "stacklok" # Create this organization on HuggingFace
|
11 |
# ----------------------------------
|
12 |
|
13 |
+
REPO_ID = f"{OWNER}/secure-llm-leaderboard"
|
14 |
QUEUE_REPO = f"{OWNER}/requests"
|
15 |
RESULTS_REPO = f"{OWNER}/results"
|
16 |
|
src/submission/check_validity.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
-
import re
|
4 |
from collections import defaultdict
|
5 |
-
from datetime import datetime, timedelta, timezone
|
6 |
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
from collections import defaultdict
|
|
|
4 |
|
5 |
import huggingface_hub
|
6 |
from huggingface_hub import ModelCard
|