Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

App Files Files Community

galb-dai commited on 25 days ago

Commit

34a2915

1 Parent(s): 416ebf1

More cleanup.

Browse files

Files changed (8) hide show

README.md +3 -3
src/about.py +1 -6
src/datamodel/data.py +16 -11
src/display/__init__.py +0 -0
src/display/utils.py +11 -58
src/logger.py +4 -1
src/submission/check_validity.py +0 -115
src/submission/submit.py +2 -1

README.md CHANGED Viewed

@@ -41,9 +41,9 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
 # Code logic for more complex edits
 You'll find
-- the main table' columns names and properties in `src/display/utils.py`
-- the logic to read all results and request files, then convert them in dataframe lines, in `src/populate.py`
-- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
 # Setting up the environment

 # Code logic for more complex edits
 You'll find
+- The main table' columns names and properties in `src/display/utils.py`
+- The logic to read all results and request files, then convert them in dataframe lines, in `src/populate.py`
+- The logic to allow or filter submissions in `src/submission/submit.py`.
 # Setting up the environment

src/about.py CHANGED Viewed

@@ -9,15 +9,11 @@ class Task:
     col_name: str
-# Select your tasks here
-# ---------------------------------------------------
 class Tasks(Enum):
-    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
     task0 = Task("FormulaOne", "success_rate", "Success Rate (%)")
-NUM_FEWSHOT = 0  # Change with your few shot
-# ---------------------------------------------------
 TITLE = """
 <h1 id="space-title" style="
@@ -34,7 +30,6 @@ TITLE = """
 </h1>
 """
-# What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
 Welcome to the official leaderboard for the paper:

     col_name: str
 class Tasks(Enum):
     task0 = Task("FormulaOne", "success_rate", "Success Rate (%)")
+NUM_FEWSHOT = 0
 TITLE = """
 <h1 id="space-title" style="
 </h1>
 """
 INTRODUCTION_TEXT = """
 Welcome to the official leaderboard for the paper:

src/datamodel/data.py CHANGED Viewed

@@ -12,10 +12,10 @@ logger = get_logger(__name__)
 class F1Data:
     def __init__(
         self,
-        cp_ds_name: str,
-        sub_ds_name: str,
-        res_ds_name: str,
-        split: str = "hard",
     ):
         self.cp_dataset_name = cp_ds_name
         self.submissions_dataset_name = sub_ds_name
@@ -27,14 +27,14 @@ class F1Data:
     def _initialize(self):
         logger.info(f"Initialize F1Data TOKEN='{TOKEN}'")
         start_time = time.monotonic()
-        cp_ds = load_dataset(self.cp_dataset_name, split=self.split, token=TOKEN)
-        logger.info(
-            "Loaded code-problems dataset from %s in %f sec",
             self.cp_dataset_name,
-            time.monotonic() - start_time,
         )
-        self.code_problems: dict[str, str] = {r["id"]: r["code_problem"] for r in cp_ds}
-        logger.info(f"Loaded %d code problems {len(self.code_problems)}")
     @functools.cached_property
     def code_problem_ids(self) -> set[str]:
@@ -43,6 +43,11 @@ class F1Data:
 if __name__ == "__main__":
     split = "hard"
-    f1_data = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=split)
     print(f"Found {len(f1_data.code_problem_ids)} code problems in {split} split of {f1_data.cp_dataset_name}")

 class F1Data:
     def __init__(
         self,
+        cp_ds_name: str,  # Name of the dataset. Fixed.
+        sub_ds_name: str,  # Name of subdataset. Fixed.
+        res_ds_name: str,  # Name of results repository. Fixed.
+        split: str = "hard",  # Split is either 'hard' or 'easy'.
     ):
         self.cp_dataset_name = cp_ds_name
         self.submissions_dataset_name = sub_ds_name
     def _initialize(self):
         logger.info(f"Initialize F1Data TOKEN='{TOKEN}'")
         start_time = time.monotonic()
+        cp_ds = load_dataset(
             self.cp_dataset_name,
+            split=self.split,
+            token=TOKEN,
         )
+        logger.info(f"Loaded code-problems dataset from {self.cp_dataset_name} in {time.monotonic() - start_time} sec")
+        self.code_problems = {r["id"]: r["code_problem"] for r in cp_ds}  # id string -> code problem.
+        logger.info(f"Loaded {len(self.code_problems)} code problems")
     @functools.cached_property
     def code_problem_ids(self) -> set[str]:
 if __name__ == "__main__":
     split = "hard"
+    f1_data = F1Data(
+        cp_ds_name=CODE_PROBLEMS_REPO,
+        sub_ds_name=SUBMISSIONS_REPO,
+        res_ds_name=RESULTS_REPO,
+        split=split,
+    )
     print(f"Found {len(f1_data.code_problem_ids)} code problems in {split} split of {f1_data.cp_dataset_name}")

src/display/__init__.py ADDED Viewed

File without changes

src/display/utils.py CHANGED Viewed

@@ -1,19 +1,15 @@
-from dataclasses import dataclass, field, make_dataclass
-from typing import ClassVar
 from enum import Enum
-import pandas as pd
-from src.about import Tasks
-def fields(raw_class):
-    return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
-# These classes are for user facing column names,
-# to avoid having to change them all around the code
-# when a modif is needed
 @dataclass
 class ColumnContent:
     name: str
@@ -23,41 +19,6 @@ class ColumnContent:
     never_hidden: bool = False
-## Leaderboard columns
-# auto_eval_column_fields = []
-# # Init
-# auto_eval_column_fields.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
-# auto_eval_column_fields.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
-# # Scores
-# auto_eval_column_fields.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
-# for task in Tasks:
-#     auto_eval_column_fields.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
-# # Model information
-# auto_eval_column_fields.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
-# auto_eval_column_fields.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
-# auto_eval_column_fields.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
-# auto_eval_column_fields.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
-# auto_eval_column_fields.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
-# auto_eval_column_fields.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
-# auto_eval_column_fields.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
-# auto_eval_column_fields.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
-# auto_eval_column_fields.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
-#
-#
-#
-# def make_classvar_dataclass(name: str, spec: list):
-#     ns = {"__annotations__": {}}
-#     for field_name, field_type, default in spec:
-#         # Mark as ClassVar so dataclass doesn't treat it as an instance field
-#         ns["__annotations__"][field_name] = ClassVar[field_type]
-#         ns[field_name] = default
-#     # No instance fields; just class-level descriptors
-#     return make_dataclass(name, [], frozen=True, namespace=ns)
-#
-# # We use make dataclass to dynamically fill the scores from Tasks
-# AutoEvalColumn = make_classvar_dataclass("AutoEvalColumn", auto_eval_column_fields)
 @dataclass(frozen=True)
 class AutoEvalColumn:
     system = ColumnContent("System Name", "markdown", True, never_hidden=True)
@@ -68,18 +29,18 @@ class AutoEvalColumn:
     submitted_on = ColumnContent("Submitted On", "datetime", True)
-## For the queue columns in the submission tab
 @dataclass(frozen=True)
 class EvalQueueColumn:  # Queue column
     model = ColumnContent("model", "markdown", True)
     revision = ColumnContent("revision", "str", True)
     private = ColumnContent("private", "bool", True)
     precision = ColumnContent("precision", "str", True)
-    weight_type = ColumnContent("weight_type", "str", "Original")
     status = ColumnContent("status", "str", True)
-## All the model information that we might need
 @dataclass
 class ModelDetails:
     name: str
@@ -90,8 +51,6 @@ class ModelDetails:
 class ModelType(Enum):
     LLM = ModelDetails(name="LLM", symbol="🟢")
     AgenticLLM = ModelDetails(name="AgenticLLM", symbol="🔶")
-    # IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
-    # RL = ModelDetails(name="RL-tuned", symbol="🟦")
     Other = ModelDetails(name="Other", symbol="?")
     def to_str(self, separator=" "):
@@ -103,10 +62,6 @@ class ModelType(Enum):
             return ModelType.AgenticLLM
         if "LLM" in type or "🟢" in type:
             return ModelType.LLM
-        # if "RL-tuned" in type or "🟦" in type:
-        #     return ModelType.RL
-        # if "instruction-tuned" in type or "⭕" in type:
-        #     return ModelType.IFT
         return ModelType.Other
@@ -130,9 +85,7 @@ class Precision(Enum):
 # Column selection
-COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
-EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
-EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
-# BENCHMARK_COLS = [t.value.col_name for t in Tasks]

+from dataclasses import dataclass
 from enum import Enum
+def _fields(raw_class):
+    return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
+# These classes are for user facing column names, to avoid having to change them
+# all around the code when a modification is needed.
 @dataclass
 class ColumnContent:
     name: str
     never_hidden: bool = False
 @dataclass(frozen=True)
 class AutoEvalColumn:
     system = ColumnContent("System Name", "markdown", True, never_hidden=True)
     submitted_on = ColumnContent("Submitted On", "datetime", True)
+# For the queue columns in the submission tab
 @dataclass(frozen=True)
 class EvalQueueColumn:  # Queue column
     model = ColumnContent("model", "markdown", True)
     revision = ColumnContent("revision", "str", True)
     private = ColumnContent("private", "bool", True)
     precision = ColumnContent("precision", "str", True)
+    weight_type = ColumnContent("weight_type", "str", True)
     status = ColumnContent("status", "str", True)
+# All the model information that we might need
 @dataclass
 class ModelDetails:
     name: str
 class ModelType(Enum):
     LLM = ModelDetails(name="LLM", symbol="🟢")
     AgenticLLM = ModelDetails(name="AgenticLLM", symbol="🔶")
     Other = ModelDetails(name="Other", symbol="?")
     def to_str(self, separator=" "):
             return ModelType.AgenticLLM
         if "LLM" in type or "🟢" in type:
             return ModelType.LLM
         return ModelType.Other
 # Column selection
+COLS = [c.name for c in _fields(AutoEvalColumn) if not c.hidden]
+EVAL_COLS = [c.name for c in _fields(EvalQueueColumn)]
+EVAL_TYPES = [c.type for c in _fields(EvalQueueColumn)]

src/logger.py CHANGED Viewed

@@ -2,7 +2,10 @@ import logging
 import sys
-def get_logger(filename: str, level=logging.INFO) -> logging.Logger:
     new_logger = logging.getLogger(filename)
     fmt = logging.Formatter(fmt="%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s")
     handler = logging.StreamHandler(sys.stderr)

 import sys
+def get_logger(
+    filename: str,
+    level=logging.INFO,
+) -> logging.Logger:
     new_logger = logging.getLogger(filename)
     fmt = logging.Formatter(fmt="%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s")
     handler = logging.StreamHandler(sys.stderr)

src/submission/check_validity.py DELETED Viewed

@@ -1,115 +0,0 @@
-import json
-import os
-import re
-from collections import defaultdict
-from datetime import datetime, timedelta, timezone
-import huggingface_hub
-from datasets import get_dataset_config_names
-from huggingface_hub import ModelCard
-from huggingface_hub.hf_api import ModelInfo
-from transformers import AutoConfig
-from transformers.models.auto.tokenization_auto import AutoTokenizer
-from src.envs import SUBMISSIONS_REPO
-def check_model_card(repo_id: str) -> tuple[bool, str]:
-    """Checks if the model card and license exist and have been filled"""
-    try:
-        card = ModelCard.load(repo_id)
-    except huggingface_hub.utils.EntryNotFoundError:
-        return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
-    # Enforce license metadata
-    if card.data.license is None:
-        if not ("license_name" in card.data and "license_link" in card.data):
-            return False, (
-                "License not found. Please add a license to your model card using the `license` metadata or a"
-                " `license_name`/`license_link` pair."
-            )
-    # Enforce card content
-    if len(card.text) < 200:
-        return False, "Please add a description to your model card, it is too short."
-    return True, ""
-def is_model_on_hub(
-    model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False
-) -> tuple[bool, str]:
-    """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
-    try:
-        config = AutoConfig.from_pretrained(
-            model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
-        )
-        if test_tokenizer:
-            try:
-                AutoTokenizer.from_pretrained(
-                    model_name,
-                    revision=revision,
-                    trust_remote_code=trust_remote_code,
-                    token=token,
-                )
-            except ValueError as e:
-                return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
-            except Exception as e:
-                return (
-                    False,
-                    "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
-                    None,
-                )
-        return True, None, config
-    except ValueError:
-        return (
-            False,
-            "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
-            None,
-        )
-    except Exception as e:
-        return False, "was not found on hub!", None
-def get_model_size(model_info: ModelInfo, precision: str):
-    """Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
-    try:
-        model_size = round(model_info.safetensors["total"] / 1e9, 3)
-    except (AttributeError, TypeError):
-        return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
-    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
-    model_size = size_factor * model_size
-    return model_size
-def get_model_arch(model_info: ModelInfo):
-    """Gets the model architecture from the configuration"""
-    return model_info.config.get("architectures", "Unknown")
-def already_submitted_models(requested_models_dir: str) -> set[str]:
-    """Gather a list of already submitted models to avoid duplicates"""
-    depth = 1
-    file_names = []
-    users_to_submission_dates = defaultdict(list)
-    for root, _, files in os.walk(requested_models_dir):
-        current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
-        if current_depth == depth:
-            for file in files:
-                if not file.endswith(".json"):
-                    continue
-                with open(os.path.join(root, file), "r") as f:
-                    info = json.load(f)
-                    file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
-                    # Select organisation
-                    if info["model"].count("/") == 0 or "submitted_time" not in info:
-                        continue
-                    organisation, _ = info["model"].split("/")
-                    users_to_submission_dates[organisation].append(info["submitted_time"])
-    return set(file_names), users_to_submission_dates

src/submission/submit.py CHANGED Viewed

@@ -90,5 +90,6 @@ def add_new_solutions(
     ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
     return styled_message(
-        "Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
     )

     ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
     return styled_message(
+        "Your request has been submitted to the evaluation queue!\n"
+        + "Results may take up to 24 hours to be processed and shown in the leaderboard."
     )