xeon27
commited on
Commit
·
954d8ee
1
Parent(s):
8471f6d
Change model names to reflect version
Browse files- refactor_eval_results.py +18 -1
- src/leaderboard/read_evals.py +3 -1
refactor_eval_results.py
CHANGED
@@ -30,7 +30,7 @@ METRIC_NAME = {
|
|
30 |
|
31 |
MODEL_SHA_MAP = {
|
32 |
# open source models
|
33 |
-
"c4ai-command-r-plus": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
|
34 |
"Meta-Llama-3.1-70B-Instruct": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
|
35 |
"Mistral-Large-Instruct-2407": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
|
36 |
"Qwen2.5-72B-Instruct": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
@@ -44,6 +44,22 @@ MODEL_SHA_MAP = {
|
|
44 |
"o1": "https://openai.com/o1",
|
45 |
}
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
AGENTIC_LOG_MODEL_NAME_MAP = {
|
48 |
"claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
|
49 |
"gemini-1.5-pro": "gemini-1.5-pro-002",
|
@@ -150,6 +166,7 @@ def main():
|
|
150 |
requests = {
|
151 |
"model": model_name,
|
152 |
"model_sha": MODEL_SHA_MAP[model_name],
|
|
|
153 |
"base_model": "",
|
154 |
"revision": "main",
|
155 |
"private": False,
|
|
|
30 |
|
31 |
MODEL_SHA_MAP = {
|
32 |
# open source models
|
33 |
+
"c4ai-command-r-plus": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
|
34 |
"Meta-Llama-3.1-70B-Instruct": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
|
35 |
"Mistral-Large-Instruct-2407": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
|
36 |
"Qwen2.5-72B-Instruct": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
|
|
44 |
"o1": "https://openai.com/o1",
|
45 |
}
|
46 |
|
47 |
+
MODEL_VERSION_MAP = {
|
48 |
+
# open source models
|
49 |
+
"c4ai-command-r-plus": "c4ai-command-r-plus",
|
50 |
+
"Meta-Llama-3.1-70B-Instruct": "Llama-3.1-70B-Instruct",
|
51 |
+
"Mistral-Large-Instruct-2407": "Mistral-Large-Instruct-2407",
|
52 |
+
"Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
|
53 |
+
|
54 |
+
# closed source models
|
55 |
+
"claude-3-5-sonnet-20241022": "Claude-3.5-Sonnet-20241022",
|
56 |
+
"gemini-1.5-flash": "Gemini-1.5-Flash",
|
57 |
+
"gemini-1.5-pro": "Gemini-1.5-Pro-002",
|
58 |
+
"gpt-4o": "GPT-4o-20240806",
|
59 |
+
"gpt-4o-mini": "GPT-4o-mini-20240718",
|
60 |
+
"o1": "o1-20241217",
|
61 |
+
}
|
62 |
+
|
63 |
AGENTIC_LOG_MODEL_NAME_MAP = {
|
64 |
"claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
|
65 |
"gemini-1.5-pro": "gemini-1.5-pro-002",
|
|
|
166 |
requests = {
|
167 |
"model": model_name,
|
168 |
"model_sha": MODEL_SHA_MAP[model_name],
|
169 |
+
"model_version": MODEL_VERSION_MAP[model_name],
|
170 |
"base_model": "",
|
171 |
"revision": "main",
|
172 |
"private": False,
|
src/leaderboard/read_evals.py
CHANGED
@@ -20,6 +20,7 @@ class EvalResult:
|
|
20 |
full_model: str # org/model (path on hub)
|
21 |
org: str
|
22 |
model: str
|
|
|
23 |
revision: str # commit hash, "" if main
|
24 |
results: dict
|
25 |
precision: Precision = Precision.Unknown
|
@@ -103,6 +104,7 @@ class EvalResult:
|
|
103 |
with open(request_file, "r") as f:
|
104 |
request = json.load(f)
|
105 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
|
|
106 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
107 |
self.license = request.get("license", "?")
|
108 |
self.likes = request.get("likes", 0)
|
@@ -115,7 +117,7 @@ class EvalResult:
|
|
115 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
116 |
data_dict = {
|
117 |
"eval_name": self.eval_name, # not a column, just a save name,
|
118 |
-
AutoEvalColumn.model.name: make_clickable_model(self.
|
119 |
}
|
120 |
|
121 |
for task in Tasks:
|
|
|
20 |
full_model: str # org/model (path on hub)
|
21 |
org: str
|
22 |
model: str
|
23 |
+
model_version: str
|
24 |
revision: str # commit hash, "" if main
|
25 |
results: dict
|
26 |
precision: Precision = Precision.Unknown
|
|
|
104 |
with open(request_file, "r") as f:
|
105 |
request = json.load(f)
|
106 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
107 |
+
self.model_version = request.get("model_version", "")
|
108 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
109 |
self.license = request.get("license", "?")
|
110 |
self.likes = request.get("likes", 0)
|
|
|
117 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
118 |
data_dict = {
|
119 |
"eval_name": self.eval_name, # not a column, just a save name,
|
120 |
+
AutoEvalColumn.model.name: make_clickable_model(self.model_version, self.revision),
|
121 |
}
|
122 |
|
123 |
for task in Tasks:
|