galb-dai commited on
Commit
34a2915
·
1 Parent(s): 416ebf1

More cleanup.

Browse files
README.md CHANGED
@@ -41,9 +41,9 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
41
  # Code logic for more complex edits
42
 
43
  You'll find
44
- - the main table' columns names and properties in `src/display/utils.py`
45
- - the logic to read all results and request files, then convert them in dataframe lines, in `src/populate.py`
46
- - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
47
 
48
 
49
  # Setting up the environment
 
41
  # Code logic for more complex edits
42
 
43
  You'll find
44
+ - The main table' columns names and properties in `src/display/utils.py`
45
+ - The logic to read all results and request files, then convert them in dataframe lines, in `src/populate.py`
46
+ - The logic to allow or filter submissions in `src/submission/submit.py`.
47
 
48
 
49
  # Setting up the environment
src/about.py CHANGED
@@ -9,15 +9,11 @@ class Task:
9
  col_name: str
10
 
11
 
12
- # Select your tasks here
13
- # ---------------------------------------------------
14
  class Tasks(Enum):
15
- # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
  task0 = Task("FormulaOne", "success_rate", "Success Rate (%)")
17
 
18
 
19
- NUM_FEWSHOT = 0 # Change with your few shot
20
- # ---------------------------------------------------
21
 
22
  TITLE = """
23
  <h1 id="space-title" style="
@@ -34,7 +30,6 @@ TITLE = """
34
  </h1>
35
  """
36
 
37
- # What does your leaderboard evaluate?
38
  INTRODUCTION_TEXT = """
39
  Welcome to the official leaderboard for the paper:
40
 
 
9
  col_name: str
10
 
11
 
 
 
12
  class Tasks(Enum):
 
13
  task0 = Task("FormulaOne", "success_rate", "Success Rate (%)")
14
 
15
 
16
+ NUM_FEWSHOT = 0
 
17
 
18
  TITLE = """
19
  <h1 id="space-title" style="
 
30
  </h1>
31
  """
32
 
 
33
  INTRODUCTION_TEXT = """
34
  Welcome to the official leaderboard for the paper:
35
 
src/datamodel/data.py CHANGED
@@ -12,10 +12,10 @@ logger = get_logger(__name__)
12
  class F1Data:
13
  def __init__(
14
  self,
15
- cp_ds_name: str,
16
- sub_ds_name: str,
17
- res_ds_name: str,
18
- split: str = "hard",
19
  ):
20
  self.cp_dataset_name = cp_ds_name
21
  self.submissions_dataset_name = sub_ds_name
@@ -27,14 +27,14 @@ class F1Data:
27
  def _initialize(self):
28
  logger.info(f"Initialize F1Data TOKEN='{TOKEN}'")
29
  start_time = time.monotonic()
30
- cp_ds = load_dataset(self.cp_dataset_name, split=self.split, token=TOKEN)
31
- logger.info(
32
- "Loaded code-problems dataset from %s in %f sec",
33
  self.cp_dataset_name,
34
- time.monotonic() - start_time,
 
35
  )
36
- self.code_problems: dict[str, str] = {r["id"]: r["code_problem"] for r in cp_ds}
37
- logger.info(f"Loaded %d code problems {len(self.code_problems)}")
 
38
 
39
  @functools.cached_property
40
  def code_problem_ids(self) -> set[str]:
@@ -43,6 +43,11 @@ class F1Data:
43
 
44
  if __name__ == "__main__":
45
  split = "hard"
46
- f1_data = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=split)
 
 
 
 
 
47
 
48
  print(f"Found {len(f1_data.code_problem_ids)} code problems in {split} split of {f1_data.cp_dataset_name}")
 
12
  class F1Data:
13
  def __init__(
14
  self,
15
+ cp_ds_name: str, # Name of the dataset. Fixed.
16
+ sub_ds_name: str, # Name of subdataset. Fixed.
17
+ res_ds_name: str, # Name of results repository. Fixed.
18
+ split: str = "hard", # Split is either 'hard' or 'easy'.
19
  ):
20
  self.cp_dataset_name = cp_ds_name
21
  self.submissions_dataset_name = sub_ds_name
 
27
  def _initialize(self):
28
  logger.info(f"Initialize F1Data TOKEN='{TOKEN}'")
29
  start_time = time.monotonic()
30
+ cp_ds = load_dataset(
 
 
31
  self.cp_dataset_name,
32
+ split=self.split,
33
+ token=TOKEN,
34
  )
35
+ logger.info(f"Loaded code-problems dataset from {self.cp_dataset_name} in {time.monotonic() - start_time} sec")
36
+ self.code_problems = {r["id"]: r["code_problem"] for r in cp_ds} # id string -> code problem.
37
+ logger.info(f"Loaded {len(self.code_problems)} code problems")
38
 
39
  @functools.cached_property
40
  def code_problem_ids(self) -> set[str]:
 
43
 
44
  if __name__ == "__main__":
45
  split = "hard"
46
+ f1_data = F1Data(
47
+ cp_ds_name=CODE_PROBLEMS_REPO,
48
+ sub_ds_name=SUBMISSIONS_REPO,
49
+ res_ds_name=RESULTS_REPO,
50
+ split=split,
51
+ )
52
 
53
  print(f"Found {len(f1_data.code_problem_ids)} code problems in {split} split of {f1_data.cp_dataset_name}")
src/display/__init__.py ADDED
File without changes
src/display/utils.py CHANGED
@@ -1,19 +1,15 @@
1
- from dataclasses import dataclass, field, make_dataclass
2
- from typing import ClassVar
3
  from enum import Enum
4
 
5
- import pandas as pd
6
 
7
- from src.about import Tasks
 
8
 
9
 
10
- def fields(raw_class):
11
- return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
12
 
13
 
14
- # These classes are for user facing column names,
15
- # to avoid having to change them all around the code
16
- # when a modif is needed
17
  @dataclass
18
  class ColumnContent:
19
  name: str
@@ -23,41 +19,6 @@ class ColumnContent:
23
  never_hidden: bool = False
24
 
25
 
26
- ## Leaderboard columns
27
- # auto_eval_column_fields = []
28
- # # Init
29
- # auto_eval_column_fields.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
30
- # auto_eval_column_fields.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
31
- # # Scores
32
- # auto_eval_column_fields.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
33
- # for task in Tasks:
34
- # auto_eval_column_fields.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
35
- # # Model information
36
- # auto_eval_column_fields.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
37
- # auto_eval_column_fields.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
38
- # auto_eval_column_fields.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
39
- # auto_eval_column_fields.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
40
- # auto_eval_column_fields.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
41
- # auto_eval_column_fields.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
42
- # auto_eval_column_fields.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
43
- # auto_eval_column_fields.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
44
- # auto_eval_column_fields.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
45
- #
46
- #
47
- #
48
- # def make_classvar_dataclass(name: str, spec: list):
49
- # ns = {"__annotations__": {}}
50
- # for field_name, field_type, default in spec:
51
- # # Mark as ClassVar so dataclass doesn't treat it as an instance field
52
- # ns["__annotations__"][field_name] = ClassVar[field_type]
53
- # ns[field_name] = default
54
- # # No instance fields; just class-level descriptors
55
- # return make_dataclass(name, [], frozen=True, namespace=ns)
56
- #
57
- # # We use make dataclass to dynamically fill the scores from Tasks
58
- # AutoEvalColumn = make_classvar_dataclass("AutoEvalColumn", auto_eval_column_fields)
59
-
60
-
61
  @dataclass(frozen=True)
62
  class AutoEvalColumn:
63
  system = ColumnContent("System Name", "markdown", True, never_hidden=True)
@@ -68,18 +29,18 @@ class AutoEvalColumn:
68
  submitted_on = ColumnContent("Submitted On", "datetime", True)
69
 
70
 
71
- ## For the queue columns in the submission tab
72
  @dataclass(frozen=True)
73
  class EvalQueueColumn: # Queue column
74
  model = ColumnContent("model", "markdown", True)
75
  revision = ColumnContent("revision", "str", True)
76
  private = ColumnContent("private", "bool", True)
77
  precision = ColumnContent("precision", "str", True)
78
- weight_type = ColumnContent("weight_type", "str", "Original")
79
  status = ColumnContent("status", "str", True)
80
 
81
 
82
- ## All the model information that we might need
83
  @dataclass
84
  class ModelDetails:
85
  name: str
@@ -90,8 +51,6 @@ class ModelDetails:
90
  class ModelType(Enum):
91
  LLM = ModelDetails(name="LLM", symbol="🟢")
92
  AgenticLLM = ModelDetails(name="AgenticLLM", symbol="🔶")
93
- # IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
94
- # RL = ModelDetails(name="RL-tuned", symbol="🟦")
95
  Other = ModelDetails(name="Other", symbol="?")
96
 
97
  def to_str(self, separator=" "):
@@ -103,10 +62,6 @@ class ModelType(Enum):
103
  return ModelType.AgenticLLM
104
  if "LLM" in type or "🟢" in type:
105
  return ModelType.LLM
106
- # if "RL-tuned" in type or "🟦" in type:
107
- # return ModelType.RL
108
- # if "instruction-tuned" in type or "⭕" in type:
109
- # return ModelType.IFT
110
  return ModelType.Other
111
 
112
 
@@ -130,9 +85,7 @@ class Precision(Enum):
130
 
131
 
132
  # Column selection
133
- COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
134
-
135
- EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
136
- EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
137
 
138
- # BENCHMARK_COLS = [t.value.col_name for t in Tasks]
 
 
1
+ from dataclasses import dataclass
 
2
  from enum import Enum
3
 
 
4
 
5
+ def _fields(raw_class):
6
+ return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
7
 
8
 
9
+ # These classes are for user facing column names, to avoid having to change them
10
+ # all around the code when a modification is needed.
11
 
12
 
 
 
 
13
  @dataclass
14
  class ColumnContent:
15
  name: str
 
19
  never_hidden: bool = False
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  @dataclass(frozen=True)
23
  class AutoEvalColumn:
24
  system = ColumnContent("System Name", "markdown", True, never_hidden=True)
 
29
  submitted_on = ColumnContent("Submitted On", "datetime", True)
30
 
31
 
32
+ # For the queue columns in the submission tab
33
  @dataclass(frozen=True)
34
  class EvalQueueColumn: # Queue column
35
  model = ColumnContent("model", "markdown", True)
36
  revision = ColumnContent("revision", "str", True)
37
  private = ColumnContent("private", "bool", True)
38
  precision = ColumnContent("precision", "str", True)
39
+ weight_type = ColumnContent("weight_type", "str", True)
40
  status = ColumnContent("status", "str", True)
41
 
42
 
43
+ # All the model information that we might need
44
  @dataclass
45
  class ModelDetails:
46
  name: str
 
51
  class ModelType(Enum):
52
  LLM = ModelDetails(name="LLM", symbol="🟢")
53
  AgenticLLM = ModelDetails(name="AgenticLLM", symbol="🔶")
 
 
54
  Other = ModelDetails(name="Other", symbol="?")
55
 
56
  def to_str(self, separator=" "):
 
62
  return ModelType.AgenticLLM
63
  if "LLM" in type or "🟢" in type:
64
  return ModelType.LLM
 
 
 
 
65
  return ModelType.Other
66
 
67
 
 
85
 
86
 
87
  # Column selection
88
+ COLS = [c.name for c in _fields(AutoEvalColumn) if not c.hidden]
 
 
 
89
 
90
+ EVAL_COLS = [c.name for c in _fields(EvalQueueColumn)]
91
+ EVAL_TYPES = [c.type for c in _fields(EvalQueueColumn)]
src/logger.py CHANGED
@@ -2,7 +2,10 @@ import logging
2
  import sys
3
 
4
 
5
- def get_logger(filename: str, level=logging.INFO) -> logging.Logger:
 
 
 
6
  new_logger = logging.getLogger(filename)
7
  fmt = logging.Formatter(fmt="%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s")
8
  handler = logging.StreamHandler(sys.stderr)
 
2
  import sys
3
 
4
 
5
+ def get_logger(
6
+ filename: str,
7
+ level=logging.INFO,
8
+ ) -> logging.Logger:
9
  new_logger = logging.getLogger(filename)
10
  fmt = logging.Formatter(fmt="%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s")
11
  handler = logging.StreamHandler(sys.stderr)
src/submission/check_validity.py DELETED
@@ -1,115 +0,0 @@
1
- import json
2
- import os
3
- import re
4
- from collections import defaultdict
5
- from datetime import datetime, timedelta, timezone
6
-
7
- import huggingface_hub
8
- from datasets import get_dataset_config_names
9
- from huggingface_hub import ModelCard
10
- from huggingface_hub.hf_api import ModelInfo
11
- from transformers import AutoConfig
12
- from transformers.models.auto.tokenization_auto import AutoTokenizer
13
-
14
- from src.envs import SUBMISSIONS_REPO
15
-
16
-
17
- def check_model_card(repo_id: str) -> tuple[bool, str]:
18
- """Checks if the model card and license exist and have been filled"""
19
- try:
20
- card = ModelCard.load(repo_id)
21
- except huggingface_hub.utils.EntryNotFoundError:
22
- return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
23
-
24
- # Enforce license metadata
25
- if card.data.license is None:
26
- if not ("license_name" in card.data and "license_link" in card.data):
27
- return False, (
28
- "License not found. Please add a license to your model card using the `license` metadata or a"
29
- " `license_name`/`license_link` pair."
30
- )
31
-
32
- # Enforce card content
33
- if len(card.text) < 200:
34
- return False, "Please add a description to your model card, it is too short."
35
-
36
- return True, ""
37
-
38
-
39
- def is_model_on_hub(
40
- model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False
41
- ) -> tuple[bool, str]:
42
- """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
43
- try:
44
- config = AutoConfig.from_pretrained(
45
- model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
46
- )
47
- if test_tokenizer:
48
- try:
49
- AutoTokenizer.from_pretrained(
50
- model_name,
51
- revision=revision,
52
- trust_remote_code=trust_remote_code,
53
- token=token,
54
- )
55
- except ValueError as e:
56
- return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
57
- except Exception as e:
58
- return (
59
- False,
60
- "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
61
- None,
62
- )
63
- return True, None, config
64
-
65
- except ValueError:
66
- return (
67
- False,
68
- "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
69
- None,
70
- )
71
-
72
- except Exception as e:
73
- return False, "was not found on hub!", None
74
-
75
-
76
- def get_model_size(model_info: ModelInfo, precision: str):
77
- """Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
78
- try:
79
- model_size = round(model_info.safetensors["total"] / 1e9, 3)
80
- except (AttributeError, TypeError):
81
- return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
82
-
83
- size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
84
- model_size = size_factor * model_size
85
- return model_size
86
-
87
-
88
- def get_model_arch(model_info: ModelInfo):
89
- """Gets the model architecture from the configuration"""
90
- return model_info.config.get("architectures", "Unknown")
91
-
92
-
93
- def already_submitted_models(requested_models_dir: str) -> set[str]:
94
- """Gather a list of already submitted models to avoid duplicates"""
95
- depth = 1
96
- file_names = []
97
- users_to_submission_dates = defaultdict(list)
98
-
99
- for root, _, files in os.walk(requested_models_dir):
100
- current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
101
- if current_depth == depth:
102
- for file in files:
103
- if not file.endswith(".json"):
104
- continue
105
- with open(os.path.join(root, file), "r") as f:
106
- info = json.load(f)
107
- file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
108
-
109
- # Select organisation
110
- if info["model"].count("/") == 0 or "submitted_time" not in info:
111
- continue
112
- organisation, _ = info["model"].split("/")
113
- users_to_submission_dates[organisation].append(info["submitted_time"])
114
-
115
- return set(file_names), users_to_submission_dates
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/submission/submit.py CHANGED
@@ -90,5 +90,6 @@ def add_new_solutions(
90
  ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
91
 
92
  return styled_message(
93
- "Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
 
94
  )
 
90
  ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
91
 
92
  return styled_message(
93
+ "Your request has been submitted to the evaluation queue!\n"
94
+ + "Results may take up to 24 hours to be processed and shown in the leaderboard."
95
  )