tofu_leaderboard / uploads.py
Pratyush Maini
works
cb06555
raw
history blame
2.72 kB
from email.utils import parseaddr
from huggingface_hub import HfApi
import os
import datetime
import pandas as pd
RESULTS_PATH = "locuslab/tofu_leaderboard"
api = HfApi()
TOKEN = os.environ.get("TOKEN", None)
YEAR_VERSION = "2024"
def format_error(msg):
return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
def format_warning(msg):
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
def format_log(msg):
return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
def model_hyperlink(link, model_name):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def input_verification(model, model_family, forget_rate, url, path_to_file, organisation, mail):
for input in [model, model_family, forget_rate, url, organisation]:
if input is "":
return format_warning("Please fill all the fields.")
# Very basic email parsing
_, parsed_mail = parseaddr(mail)
if not "@" in parsed_mail:
return format_warning("Please provide a valid email adress.")
if path_to_file is None:
return format_warning("Please attach a file.")
return parsed_mail
def add_new_eval(
model: str,
model_family: str,
forget_rate: str,
url: str,
path_to_file: str,
organisation: str,
mail: str,
):
parsed_mail = input_verification(model, model_family, forget_rate, url, path_to_file, organisation, mail)
# load the file
df = pd.read_csv(path_to_file)
# modify the df to include metadata
df["model"] = model
df["model_family"] = model_family
df["forget_rate"] = forget_rate
df["url"] = url
df["organisation"] = organisation
df["mail"] = parsed_mail
df["timestamp"] = datetime.datetime.now()
#upload to spaces using the hf api at
path_in_repo = f"versions/{model_family}-{forget_rate.replace('%', 'p')}"
file_name = f"{model}-{organisation}-{datetime.datetime.now().strftime('%Y-%m-%d')}.csv"
# upload the df to spaces
import io
buffer = io.BytesIO()
df.to_csv(buffer, index=False) # Write the DataFrame to a buffer in CSV format
buffer.seek(0) # Rewind the buffer to the beginning
api.upload_file(
repo_id = RESULTS_PATH,
path_in_repo = f"{path_in_repo}/{file_name}",
path_or_fileobj = buffer,
token=TOKEN,
repo_type="space",
)
return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")