IL-TUR-Leaderboard / uploads.py
abhinav-joshi's picture
add prediction submission
e1043c6
raw
history blame
5.07 kB
from email.utils import parseaddr
from huggingface_hub import HfApi
import os
import datetime
import json
import pandas as pd
import gradio as gr
from eval_utils import get_evaluation_scores
LEADERBOARD_PATH = "Exploration-Lab/IL-TUR-Leaderboard"
SUBMISSION_FORMAT = "predictions"
# RESULTS_PATH = "Exploration-Lab/IL-TUR-Leaderboard-results"
TOKEN = os.environ.get("TOKEN", None)
YEAR_VERSION = "2024"
api = HfApi(token=TOKEN)
def format_error(msg):
return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
def format_warning(msg):
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
def format_log(msg):
return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
def model_hyperlink(link, model_name):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def input_verification(method_name, url, path_to_file, organisation, mail):
for input in [method_name, url, path_to_file, organisation, mail]:
if input == "":
return format_warning("Please fill all the fields.")
# Very basic email parsing
_, parsed_mail = parseaddr(mail)
if not "@" in parsed_mail:
return format_warning("Please provide a valid email adress.")
if path_to_file is None:
return format_warning("Please attach a file.")
# check the required fields
required_fields = ["Method", "Submitted By", "url", "organisation", "mail"]
# Check if the required_fields are not blank
for field in required_fields:
if field not in locals():
raise gr.Error(f"{field} cannot be blank")
return parsed_mail
def add_new_eval(
method_name: str,
submitted_by: str,
url: str,
path_to_file: str,
organisation: str,
mail: str,
):
parsed_mail = input_verification(
method_name,
url,
path_to_file,
organisation,
mail,
)
# # load the file
# df = pd.read_csv(path_to_file)
# submission_df = pd.read_csv(path_to_file)
# # modify the df to include metadata
# df["Method"] = method_name
# df["url"] = url
# df["organisation"] = organisation
# df["mail"] = parsed_mail
# df["timestamp"] = datetime.datetime.now()
# submission_df = pd.read_csv(path_to_file)
# submission_df["Method"] = method_name
# submission_df["Submitted By"] = organisation
# # upload to spaces using the hf api at
# path_in_repo = f"submissions/{method_name}"
# file_name = f"{method_name}-{organisation}-{datetime.datetime.now().strftime('%Y-%m-%d')}.csv"
# upload the df to spaces
import io
if SUBMISSION_FORMAT == "predictions":
# read the submission json file
with open(path_to_file, "r") as f:
submission_data = json.load(f)
# read the gold json file
with open("submissions/baseline/IL_TUR_eval_gold_small.json", "r") as f:
gold_data = json.load(f)
submission = get_evaluation_scores(gold_data, submission_data)
else:
# read the submission json file
with open(path_to_file, "r") as f:
submission = json.load(f)
with open("submissions/baseline/results.json", "r") as f:
results = json.load(f)
# update the results
results.append(submission[0])
leaderboard_buffer = io.BytesIO()
# df.to_csv(buffer, index=False) # Write the DataFrame to a buffer in CSV format
# buffer.seek(0) # Rewind the buffer to the beginning
# save the results to buffer
leaderboard_buffer.write(json.dumps(results).encode())
leaderboard_buffer.seek(0)
# api.upload_file(
# repo_id=RESULTS_PATH,
# path_in_repo=f"{path_in_repo}/{file_name}",
# path_or_fileobj=buffer,
# token=TOKEN,
# repo_type="dataset",
# )
# # read the leaderboard
# leaderboard_df = pd.read_csv(f"submissions/baseline/baseline.csv")
# # append the new submission_df csv to the leaderboard
# # leaderboard_df = leaderboard_df._append(submission_df)
# # leaderboard_df = pd.concat([leaderboard_df, submission_df], ignore_index=True)
# # save the new leaderboard
# # leaderboard_df.to_csv(f"submissions/baseline/baseline.csv", index=False)
# leaderboard_buffer = io.BytesIO()
# leaderboard_df.to_csv(leaderboard_buffer, index=False)
# leaderboard_buffer.seek(0)
# with open("submissions/baseline/results.json", "w") as f:
# json.dump(results, f)
api.upload_file(
repo_id=LEADERBOARD_PATH,
# path_in_repo=f"submissions/baseline/baseline.csv",
path_in_repo=f"submissions/baseline/results.json",
path_or_fileobj=leaderboard_buffer,
token=TOKEN,
repo_type="space",
)
return format_log(
f"Method {method_name} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed"
)