Update src/leaderboard/read_evals.py
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -31,6 +31,7 @@ class EvalResult:
|
|
31 |
num_params: int = 0
|
32 |
date: str = "" # submission date of request file
|
33 |
still_on_hub: bool = False
|
|
|
34 |
|
35 |
@classmethod
|
36 |
def init_from_json_file(self, json_filepath):
|
@@ -57,6 +58,12 @@ class EvalResult:
|
|
57 |
result_key = f"{org}_{model}_{precision.value.name}"
|
58 |
full_model = "/".join(org_and_model)
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
still_on_hub, _, model_config = is_model_on_hub(
|
61 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
62 |
)
|
@@ -88,7 +95,8 @@ class EvalResult:
|
|
88 |
precision=precision,
|
89 |
revision= config.get("model_sha", ""),
|
90 |
still_on_hub=still_on_hub,
|
91 |
-
architecture=architecture
|
|
|
92 |
)
|
93 |
|
94 |
def update_with_request_file(self, requests_path):
|
@@ -110,6 +118,10 @@ class EvalResult:
|
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
|
|
|
|
|
|
|
|
113 |
data_dict = {
|
114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
@@ -117,7 +129,7 @@ class EvalResult:
|
|
117 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
118 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
119 |
AutoEvalColumn.architecture.name: self.architecture,
|
120 |
-
AutoEvalColumn.model.name: make_clickable_model(
|
121 |
AutoEvalColumn.revision.name: self.revision,
|
122 |
AutoEvalColumn.average.name: average,
|
123 |
AutoEvalColumn.license.name: self.license,
|
@@ -193,4 +205,4 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
193 |
except KeyError: # not all eval values present
|
194 |
continue
|
195 |
|
196 |
-
return results
|
|
|
31 |
num_params: int = 0
|
32 |
date: str = "" # submission date of request file
|
33 |
still_on_hub: bool = False
|
34 |
+
display_model: str = "" # ์๋ก ์ถ๊ฐ: ํ์์ฉ ๋ชจ๋ธ๋ช
|
35 |
|
36 |
@classmethod
|
37 |
def init_from_json_file(self, json_filepath):
|
|
|
58 |
result_key = f"{org}_{model}_{precision.value.name}"
|
59 |
full_model = "/".join(org_and_model)
|
60 |
|
61 |
+
# ํน์ ๋ชจ๋ธ๋ช
์ ๋ํ ๋งตํ ์ฒ๋ฆฌ ์ถ๊ฐ
|
62 |
+
display_model = full_model
|
63 |
+
if full_model == "demo-leaderboard/gpt2-demo":
|
64 |
+
display_model = "deepseek-ai/DeepSeek-R1"
|
65 |
+
print(f"๋ชจ๋ธ๋ช
๋งตํ ์ ์ฉ: {full_model} -> {display_model}")
|
66 |
+
|
67 |
still_on_hub, _, model_config = is_model_on_hub(
|
68 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
69 |
)
|
|
|
95 |
precision=precision,
|
96 |
revision= config.get("model_sha", ""),
|
97 |
still_on_hub=still_on_hub,
|
98 |
+
architecture=architecture,
|
99 |
+
display_model=display_model # ์๋ก ์ถ๊ฐํ ํ๋ ์ค์
|
100 |
)
|
101 |
|
102 |
def update_with_request_file(self, requests_path):
|
|
|
118 |
def to_dict(self):
|
119 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
120 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
121 |
+
|
122 |
+
# ํ์์ฉ ๋ชจ๋ธ๋ช
์ฌ์ฉ
|
123 |
+
model_to_display = self.display_model if self.display_model else self.full_model
|
124 |
+
|
125 |
data_dict = {
|
126 |
"eval_name": self.eval_name, # not a column, just a save name,
|
127 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
129 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
130 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
131 |
AutoEvalColumn.architecture.name: self.architecture,
|
132 |
+
AutoEvalColumn.model.name: make_clickable_model(model_to_display), # ์์ ๋ ๋ถ๋ถ
|
133 |
AutoEvalColumn.revision.name: self.revision,
|
134 |
AutoEvalColumn.average.name: average,
|
135 |
AutoEvalColumn.license.name: self.license,
|
|
|
205 |
except KeyError: # not all eval values present
|
206 |
continue
|
207 |
|
208 |
+
return results
|