Jacqueline Garrahan commited on
Commit
acdf544
1 Parent(s): 8d60ca3

Check in updated leaderboard

Browse files
src/about.py CHANGED
@@ -57,9 +57,9 @@ EVALUATION_QUEUE_TEXT = """
57
  ### 1) Make sure you can load your model and tokenizer using AutoClasses:
58
  ```python
59
  from transformers import AutoConfig, AutoModel, AutoTokenizer
60
- config = AutoConfig.from_pretrained("your model name", revision=revision)
61
- model = AutoModel.from_pretrained("your model name", revision=revision)
62
- tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
63
  ```
64
  If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
65
 
 
57
  ### 1) Make sure you can load your model and tokenizer using AutoClasses:
58
  ```python
59
  from transformers import AutoConfig, AutoModel, AutoTokenizer
60
+ config = AutoConfig.from_pretrained("your model name", revision="main")
61
+ model = AutoModel.from_pretrained("your model name", revision="main")
62
+ tokenizer = AutoTokenizer.from_pretrained("your model name", revision="main")
63
  ```
64
  If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
65
 
src/display/formatting.py CHANGED
@@ -2,8 +2,9 @@ def model_hyperlink(link, model_name):
2
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
3
 
4
 
5
- def make_clickable_model(model_name):
6
- link = f"https://huggingface.co/{model_name}"
 
7
  return model_hyperlink(link, model_name)
8
 
9
 
 
2
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
3
 
4
 
5
+ def make_clickable_model(full_model_name):
6
+ model_name = full_model_name.split("/")[1]
7
+ link = f"https://huggingface.co/{full_model_name}"
8
  return model_hyperlink(link, model_name)
9
 
10
 
src/display/utils.py CHANGED
@@ -23,22 +23,18 @@ class ColumnContent:
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
 
28
  #Scores
29
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
- # Model information
33
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
40
  auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
 
 
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
@@ -47,10 +43,7 @@ AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=
47
  @dataclass(frozen=True)
48
  class EvalQueueColumn: # Queue column
49
  model = ColumnContent("model", "markdown", True)
50
- revision = ColumnContent("revision", "str", True)
51
  private = ColumnContent("private", "bool", True)
52
- precision = ColumnContent("precision", "str", True)
53
- weight_type = ColumnContent("weight_type", "str", "?")
54
  status = ColumnContent("status", "str", True)
55
 
56
  ## All the model information that we might need
@@ -60,72 +53,6 @@ class ModelDetails:
60
  display_name: str = ""
61
  symbol: str = "" # emoji
62
 
63
-
64
- class ModelType(Enum):
65
- PT = ModelDetails(name="pretrained", symbol="🟢")
66
- FT = ModelDetails(name="fine-tuned", symbol="🔶")
67
- IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
68
- RL = ModelDetails(name="RL-tuned", symbol="🟦")
69
- Unknown = ModelDetails(name="", symbol="?")
70
-
71
- def to_str(self, separator=" "):
72
- return f"{self.value.symbol}{separator}{self.value.name}"
73
-
74
- @staticmethod
75
- def from_str(type):
76
- if "fine-tuned" in type or "🔶" in type:
77
- return ModelType.FT
78
- if "pretrained" in type or "🟢" in type:
79
- return ModelType.PT
80
- if "RL-tuned" in type or "🟦" in type:
81
- return ModelType.RL
82
- if "instruction-tuned" in type or "⭕" in type:
83
- return ModelType.IFT
84
- return ModelType.Unknown
85
-
86
- class WeightType(Enum):
87
- Adapter = ModelDetails("Adapter")
88
- Original = ModelDetails("Original")
89
- Delta = ModelDetails("Delta")
90
- Unknown = ModelDetails("?")
91
-
92
- def from_str(weight):
93
- if weight == "Adapter":
94
- return WeightType.Adapter
95
-
96
- elif weight == "Original":
97
- return WeightType.Original
98
-
99
- elif weight == "Delta":
100
- return WeightType.Delta
101
-
102
- else:
103
- return WeightType.Unknown
104
-
105
- class Precision(Enum):
106
- float16 = ModelDetails("float16")
107
- bfloat16 = ModelDetails("bfloat16")
108
- float32 = ModelDetails("float32")
109
- #qt_8bit = ModelDetails("8bit")
110
- #qt_4bit = ModelDetails("4bit")
111
- #qt_GPTQ = ModelDetails("GPTQ")
112
- Unknown = ModelDetails("?")
113
-
114
- def from_str(precision):
115
- if precision in ["torch.float16", "float16"]:
116
- return Precision.float16
117
- if precision in ["torch.bfloat16", "bfloat16"]:
118
- return Precision.bfloat16
119
- if precision in ["float32"]:
120
- return Precision.float32
121
- #if precision in ["8bit"]:
122
- # return Precision.qt_8bit
123
- #if precision in ["4bit"]:
124
- # return Precision.qt_4bit
125
- #if precision in ["GPTQ", "None"]:
126
- # return Precision.qt_GPTQ
127
- return Precision.Unknown
128
-
129
  # Column selection
130
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
131
 
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
 
26
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
27
+ auto_eval_column_dict.append(["org", ColumnContent, ColumnContent("Organization", "str", True)])
28
  #Scores
29
+ auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Aiera Score ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
+
 
 
 
 
 
33
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
 
34
  auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
35
+ auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("License", "str", False)])
36
+
37
+
38
 
39
  # We use make dataclass to dynamically fill the scores from Tasks
40
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
43
  @dataclass(frozen=True)
44
  class EvalQueueColumn: # Queue column
45
  model = ColumnContent("model", "markdown", True)
 
46
  private = ColumnContent("private", "bool", True)
 
 
47
  status = ColumnContent("status", "str", True)
48
 
49
  ## All the model information that we might need
 
53
  display_name: str = ""
54
  symbol: str = "" # emoji
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # Column selection
57
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
58
 
src/leaderboard/read_evals.py CHANGED
@@ -8,11 +8,10 @@ import dateutil
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
  from src.envs import EXTERNAL_PROVIDERS
14
 
15
-
16
  @dataclass
17
  class EvalResult:
18
  """Represents one full evaluation. Built from a combination of the result and request file for a given run.
@@ -21,14 +20,8 @@ class EvalResult:
21
  full_model: str # org/model (path on hub)
22
  org: str
23
  model: str
24
- revision: str # commit hash, "" if main
25
  results: dict
26
- precision: Precision = Precision.Unknown
27
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
28
- weight_type: WeightType = WeightType.Original # Original or Adapter
29
- architecture: str = "Unknown"
30
  license: str = "?"
31
- likes: int = 0
32
  num_params: int = 0
33
  date: str = "" # submission date of request file
34
  still_on_hub: bool = False
@@ -41,9 +34,6 @@ class EvalResult:
41
 
42
  config = data.get("config")
43
 
44
- # Precision
45
- precision = Precision.from_str(config.get("model_dtype"))
46
-
47
  # Get model and org
48
  org_and_model = config.get("model_name", config.get("model_args", None))
49
  org_and_model = org_and_model.split("/", 1)
@@ -51,26 +41,20 @@ class EvalResult:
51
  if len(org_and_model) == 1:
52
  org = None
53
  model = org_and_model[0]
54
- result_key = f"{model}_{precision.value.name}"
55
  else:
56
  org = org_and_model[0]
57
  model = org_and_model[1]
58
- result_key = f"{org}_{model}_{precision.value.name}"
59
  full_model = "/".join(org_and_model)
60
 
61
- architecture = None
62
- model_config = None
63
  still_on_hub = False
64
  if not any([org.lower() in provider for provider in EXTERNAL_PROVIDERS]):
65
  still_on_hub, _, model_config = is_model_on_hub(
66
- full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
67
  )
68
- architecture = "?"
69
 
70
- if model_config is not None:
71
- architectures = getattr(model_config, "architectures", None)
72
- if architectures:
73
- architecture = ";".join(architectures)
74
 
75
  # Extract results available in this file (some results are split in several files)
76
  results = {}
@@ -91,10 +75,8 @@ class EvalResult:
91
  org=org,
92
  model=model,
93
  results=results,
94
- precision=precision,
95
- revision= config.get("model_sha", ""),
96
  still_on_hub=still_on_hub,
97
- architecture=architecture
98
  )
99
 
100
  def update_with_request_file(self, requests_path):
@@ -104,10 +86,7 @@ class EvalResult:
104
  #try:
105
  with open(request_file, "r") as f:
106
  request = json.load(f)
107
- self.model_type = ModelType.from_str(request.get("model_type", ""))
108
- self.weight_type = WeightType[request.get("weight_type", "Unknown")]
109
  self.license = request.get("license", "?")
110
- self.likes = request.get("likes", 0)
111
  self.num_params = request.get("params", "?")
112
  self.date = request.get("submitted_time", "")
113
  #except Exception:
@@ -118,16 +97,10 @@ class EvalResult:
118
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
119
  data_dict = {
120
  "eval_name": self.eval_name, # not a column, just a save name,
121
- AutoEvalColumn.precision.name: self.precision.value.name,
122
- AutoEvalColumn.model_type.name: self.model_type.value.name,
123
- AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
124
- AutoEvalColumn.weight_type.name: self.weight_type.value.name,
125
- AutoEvalColumn.architecture.name: self.architecture,
126
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
127
- AutoEvalColumn.revision.name: self.revision,
128
  AutoEvalColumn.average.name: average,
129
  AutoEvalColumn.license.name: self.license,
130
- AutoEvalColumn.likes.name: self.likes,
131
  AutoEvalColumn.params.name: self.num_params,
132
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
133
  }
 
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
+ from src.display.utils import AutoEvalColumn, Tasks
12
  from src.submission.check_validity import is_model_on_hub
13
  from src.envs import EXTERNAL_PROVIDERS
14
 
 
15
  @dataclass
16
  class EvalResult:
17
  """Represents one full evaluation. Built from a combination of the result and request file for a given run.
 
20
  full_model: str # org/model (path on hub)
21
  org: str
22
  model: str
 
23
  results: dict
 
 
 
 
24
  license: str = "?"
 
25
  num_params: int = 0
26
  date: str = "" # submission date of request file
27
  still_on_hub: bool = False
 
34
 
35
  config = data.get("config")
36
 
 
 
 
37
  # Get model and org
38
  org_and_model = config.get("model_name", config.get("model_args", None))
39
  org_and_model = org_and_model.split("/", 1)
 
41
  if len(org_and_model) == 1:
42
  org = None
43
  model = org_and_model[0]
44
+ result_key = model
45
  else:
46
  org = org_and_model[0]
47
  model = org_and_model[1]
48
+ result_key = f"{org}_{model}"
49
  full_model = "/".join(org_and_model)
50
 
 
 
51
  still_on_hub = False
52
  if not any([org.lower() in provider for provider in EXTERNAL_PROVIDERS]):
53
  still_on_hub, _, model_config = is_model_on_hub(
54
+ full_model, trust_remote_code=True, test_tokenizer=False
55
  )
 
56
 
57
+ num_params = data.get("params")
 
 
 
58
 
59
  # Extract results available in this file (some results are split in several files)
60
  results = {}
 
75
  org=org,
76
  model=model,
77
  results=results,
78
+ num_params=num_params,
 
79
  still_on_hub=still_on_hub,
 
80
  )
81
 
82
  def update_with_request_file(self, requests_path):
 
86
  #try:
87
  with open(request_file, "r") as f:
88
  request = json.load(f)
 
 
89
  self.license = request.get("license", "?")
 
90
  self.num_params = request.get("params", "?")
91
  self.date = request.get("submitted_time", "")
92
  #except Exception:
 
97
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
98
  data_dict = {
99
  "eval_name": self.eval_name, # not a column, just a save name,
 
 
 
 
 
100
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
 
101
  AutoEvalColumn.average.name: average,
102
  AutoEvalColumn.license.name: self.license,
103
+ AutoEvalColumn.org.name: self.org,
104
  AutoEvalColumn.params.name: self.num_params,
105
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
106
  }
src/populate.py CHANGED
@@ -34,7 +34,6 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
34
  data = json.load(fp)
35
 
36
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
37
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
38
 
39
  all_evals.append(data)
40
  elif ".md" not in entry:
@@ -46,7 +45,6 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
46
  data = json.load(fp)
47
 
48
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
49
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
50
  all_evals.append(data)
51
 
52
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
 
34
  data = json.load(fp)
35
 
36
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
 
37
 
38
  all_evals.append(data)
39
  elif ".md" not in entry:
 
45
  data = json.load(fp)
46
 
47
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
 
48
  all_evals.append(data)
49
 
50
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
src/submission/check_validity.py CHANGED
@@ -33,7 +33,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
33
 
34
  return True, ""
35
 
36
- def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
37
  """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
38
  model_info = API.model_info(model_name, revision="main")
39
  try:
@@ -41,7 +41,7 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
41
  config = model_info.config
42
  if test_tokenizer:
43
  try:
44
- tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
45
  except ValueError as e:
46
  return (
47
  False,
@@ -92,7 +92,7 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
92
  continue
93
  with open(os.path.join(root, file), "r") as f:
94
  info = json.load(f)
95
- file_names.append(f"{info['model']}_{info['revision']}")
96
 
97
  # Select organisation
98
  if info["model"].count("/") == 0 or "submitted_time" not in info:
 
33
 
34
  return True, ""
35
 
36
+ def is_model_on_hub(model_name: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
37
  """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
38
  model_info = API.model_info(model_name, revision="main")
39
  try:
 
41
  config = model_info.config
42
  if test_tokenizer:
43
  try:
44
+ tk = AutoTokenizer.from_pretrained(model_name, revision="main", trust_remote_code=trust_remote_code, token=token)
45
  except ValueError as e:
46
  return (
47
  False,
 
92
  continue
93
  with open(os.path.join(root, file), "r") as f:
94
  info = json.load(f)
95
+ file_names.append(info['model'])
96
 
97
  # Select organisation
98
  if info["model"].count("/") == 0 or "submitted_time" not in info:
src/submission/submit.py CHANGED
@@ -15,12 +15,7 @@ REQUESTED_MODELS = None
15
  USERS_TO_SUBMISSION_DATES = None
16
 
17
  def add_new_eval(
18
- model: str,
19
- base_model: str,
20
- revision: str,
21
- precision: str,
22
- weight_type: str,
23
- model_type: str,
24
  ):
25
  global REQUESTED_MODELS
26
  global USERS_TO_SUBMISSION_DATES
@@ -36,27 +31,10 @@ def add_new_eval(
36
  precision = precision.split(" ")[0]
37
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
38
 
39
- if model_type is None or model_type == "":
40
- return styled_error("Please select a model type.")
41
-
42
- # Does the model actually exist?
43
- if revision == "":
44
- revision = "main"
45
-
46
- # Is the model on the hub?
47
- if weight_type in ["Delta", "Adapter"]:
48
- base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=False)
49
- if not base_model_on_hub:
50
- return styled_error(f'Base model "{base_model}" {error}')
51
-
52
- if not weight_type == "Adapter":
53
- model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=False)
54
- if not model_on_hub:
55
- return styled_error(f'Model "{model}" {error}')
56
 
57
  # Is the model info correctly filled?
58
  try:
59
- model_info = API.model_info(repo_id=model, revision=revision)
60
  except Exception:
61
  return styled_error("Could not get your model information. Please fill it up properly.")
62
 
@@ -78,12 +56,9 @@ def add_new_eval(
78
  eval_entry = {
79
  "model": model,
80
  "base_model": base_model,
81
- "revision": revision,
82
  "precision": precision,
83
- "weight_type": weight_type,
84
  "status": "PENDING",
85
  "submitted_time": current_time,
86
- "model_type": model_type,
87
  "likes": model_info.likes,
88
  "params": model_size,
89
  "license": license,
@@ -91,13 +66,13 @@ def add_new_eval(
91
  }
92
 
93
  # Check for duplicate submission
94
- if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
95
  return styled_warning("This model has been already submitted.")
96
 
97
  print("Creating eval file")
98
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
99
  os.makedirs(OUT_DIR, exist_ok=True)
100
- out_path = f"{OUT_DIR}/{model_path}_{revision}.json"
101
 
102
  with open(out_path, "w") as f:
103
  f.write(json.dumps(eval_entry))
 
15
  USERS_TO_SUBMISSION_DATES = None
16
 
17
  def add_new_eval(
18
+ model: str
 
 
 
 
 
19
  ):
20
  global REQUESTED_MODELS
21
  global USERS_TO_SUBMISSION_DATES
 
31
  precision = precision.split(" ")[0]
32
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # Is the model info correctly filled?
36
  try:
37
+ model_info = API.model_info(repo_id=model, revision="main")
38
  except Exception:
39
  return styled_error("Could not get your model information. Please fill it up properly.")
40
 
 
56
  eval_entry = {
57
  "model": model,
58
  "base_model": base_model,
 
59
  "precision": precision,
 
60
  "status": "PENDING",
61
  "submitted_time": current_time,
 
62
  "likes": model_info.likes,
63
  "params": model_size,
64
  "license": license,
 
66
  }
67
 
68
  # Check for duplicate submission
69
+ if model in REQUESTED_MODELS:
70
  return styled_warning("This model has been already submitted.")
71
 
72
  print("Creating eval file")
73
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
74
  os.makedirs(OUT_DIR, exist_ok=True)
75
+ out_path = f"{OUT_DIR}/{model_path}.json"
76
 
77
  with open(out_path, "w") as f:
78
  f.write(json.dumps(eval_entry))