tofu_leaderboard / uploads.py
pratyushmaini
upload
7628397
raw
history blame
3.1 kB
from email.utils import parseaddr
from huggingface_hub import HfApi
import os
import datetime
OWNER="locuslab"
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
RESULTS_DATASET = f"{OWNER}/results_public"
LEADERBOARD_PATH = f"{OWNER}/tofu_leaderboard"
api = HfApi()
TOKEN = os.environ.get("TOKEN", None)
YEAR_VERSION = "2024"
def format_error(msg):
return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
def format_warning(msg):
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
def format_log(msg):
return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
def model_hyperlink(link, model_name):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def add_new_eval(
val_or_test: str,
model: str,
model_family: str,
system_prompt: str,
url: str,
path_to_file: str,
organisation: str,
mail: str,
):
# Very basic email parsing
_, parsed_mail = parseaddr(mail)
if not "@" in parsed_mail:
return format_warning("Please provide a valid email adress.")
print("Adding new eval")
# Check if the combination model/org already exists and prints a warning message if yes
# if model.lower() in set(eval_results[val_or_test]["model"]) and organisation.lower() in set(eval_results[val_or_test]["organisation"]):
# return format_warning("This model has been already submitted.")
if path_to_file is None:
return format_warning("Please attach a file.")
# Save submitted file
api.upload_file(
repo_id=SUBMISSION_DATASET,
path_or_fileobj=path_to_file.name,
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
repo_type="dataset",
token=TOKEN
)
# Compute score
# Save scored file
api.upload_file(
repo_id=SUBMISSION_DATASET,
path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
repo_type="dataset",
token=TOKEN
)
# Actual submission
eval_entry = {
"model": model,
"model_family": model_family,
"system_prompt": system_prompt,
"url": url,
"organisation": organisation,
"mail": mail,
# "score": scores["all"]/num_questions["all"],
# "score_level1": scores[1]/num_questions[1],
# "score_level2": scores[2]/num_questions[2],
# "score_level3": scores[3]/num_questions[3],
}
# eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
# print(eval_results)
# eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")