xeon27 commited on
Commit
954d8ee
·
1 Parent(s): 8471f6d

Change model names to reflect version

Browse files
refactor_eval_results.py CHANGED
@@ -30,7 +30,7 @@ METRIC_NAME = {
30
 
31
  MODEL_SHA_MAP = {
32
  # open source models
33
- "c4ai-command-r-plus": "https://huggingface.co/CohereForAI/c4ai-command-r-plus", # TODO: verify for the 08-2024 version
34
  "Meta-Llama-3.1-70B-Instruct": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
35
  "Mistral-Large-Instruct-2407": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
36
  "Qwen2.5-72B-Instruct": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
@@ -44,6 +44,22 @@ MODEL_SHA_MAP = {
44
  "o1": "https://openai.com/o1",
45
  }
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  AGENTIC_LOG_MODEL_NAME_MAP = {
48
  "claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
49
  "gemini-1.5-pro": "gemini-1.5-pro-002",
@@ -150,6 +166,7 @@ def main():
150
  requests = {
151
  "model": model_name,
152
  "model_sha": MODEL_SHA_MAP[model_name],
 
153
  "base_model": "",
154
  "revision": "main",
155
  "private": False,
 
30
 
31
  MODEL_SHA_MAP = {
32
  # open source models
33
+ "c4ai-command-r-plus": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
34
  "Meta-Llama-3.1-70B-Instruct": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
35
  "Mistral-Large-Instruct-2407": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
36
  "Qwen2.5-72B-Instruct": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
 
44
  "o1": "https://openai.com/o1",
45
  }
46
 
47
+ MODEL_VERSION_MAP = {
48
+ # open source models
49
+ "c4ai-command-r-plus": "c4ai-command-r-plus",
50
+ "Meta-Llama-3.1-70B-Instruct": "Llama-3.1-70B-Instruct",
51
+ "Mistral-Large-Instruct-2407": "Mistral-Large-Instruct-2407",
52
+ "Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
53
+
54
+ # closed source models
55
+ "claude-3-5-sonnet-20241022": "Claude-3.5-Sonnet-20241022",
56
+ "gemini-1.5-flash": "Gemini-1.5-Flash",
57
+ "gemini-1.5-pro": "Gemini-1.5-Pro-002",
58
+ "gpt-4o": "GPT-4o-20240806",
59
+ "gpt-4o-mini": "GPT-4o-mini-20240718",
60
+ "o1": "o1-20241217",
61
+ }
62
+
63
  AGENTIC_LOG_MODEL_NAME_MAP = {
64
  "claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
65
  "gemini-1.5-pro": "gemini-1.5-pro-002",
 
166
  requests = {
167
  "model": model_name,
168
  "model_sha": MODEL_SHA_MAP[model_name],
169
+ "model_version": MODEL_VERSION_MAP[model_name],
170
  "base_model": "",
171
  "revision": "main",
172
  "private": False,
src/leaderboard/read_evals.py CHANGED
@@ -20,6 +20,7 @@ class EvalResult:
20
  full_model: str # org/model (path on hub)
21
  org: str
22
  model: str
 
23
  revision: str # commit hash, "" if main
24
  results: dict
25
  precision: Precision = Precision.Unknown
@@ -103,6 +104,7 @@ class EvalResult:
103
  with open(request_file, "r") as f:
104
  request = json.load(f)
105
  self.model_type = ModelType.from_str(request.get("model_type", ""))
 
106
  self.weight_type = WeightType[request.get("weight_type", "Original")]
107
  self.license = request.get("license", "?")
108
  self.likes = request.get("likes", 0)
@@ -115,7 +117,7 @@ class EvalResult:
115
  """Converts the Eval Result to a dict compatible with our dataframe display"""
116
  data_dict = {
117
  "eval_name": self.eval_name, # not a column, just a save name,
118
- AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.revision),
119
  }
120
 
121
  for task in Tasks:
 
20
  full_model: str # org/model (path on hub)
21
  org: str
22
  model: str
23
+ model_version: str
24
  revision: str # commit hash, "" if main
25
  results: dict
26
  precision: Precision = Precision.Unknown
 
104
  with open(request_file, "r") as f:
105
  request = json.load(f)
106
  self.model_type = ModelType.from_str(request.get("model_type", ""))
107
+ self.model_version = request.get("model_version", "")
108
  self.weight_type = WeightType[request.get("weight_type", "Original")]
109
  self.license = request.get("license", "?")
110
  self.likes = request.get("likes", 0)
 
117
  """Converts the Eval Result to a dict compatible with our dataframe display"""
118
  data_dict = {
119
  "eval_name": self.eval_name, # not a column, just a save name,
120
+ AutoEvalColumn.model.name: make_clickable_model(self.model_version, self.revision),
121
  }
122
 
123
  for task in Tasks: