Spaces:
Running
Running
import logging | |
import os | |
os.makedirs("tmp", exist_ok=True) | |
os.environ['TMP_DIR'] = "tmp" | |
import subprocess | |
import shutil | |
import glob | |
import gradio as gr | |
import numpy as np | |
from src.radial.radial import create_plot | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from gradio_leaderboard import Leaderboard, SelectColumns | |
from gradio_space_ci import enable_space_ci | |
import json | |
from io import BytesIO | |
def handle_file_upload(file): | |
file_path = file.name.split("/")[-1] if "/" in file.name else file.name | |
logging.info("File uploaded: %s", file_path) | |
with open(file.name, "r") as f: | |
v = json.load(f) | |
return v, file_path | |
def submit_file(v, file_path, mn, profile: gr.OAuthProfile | None): | |
print('START SUBMITTING!!!') | |
if profile is None: | |
return "Hub Login Required" | |
print('PROFILE: ', profile.__dict__) | |
new_file = v['results'] | |
if profile.username == 'kz-transformers': | |
new_file['model'] = mn | |
else: | |
new_file['model'] = profile.username + "/" + mn | |
columns = [ | |
'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc', 'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc', | |
'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc', 'kk_english_unt_mc', 'kk_biology_unt_mc', | |
'kk_human_society_rights_unt_mc' | |
] | |
for column in columns: | |
new_file[column] = new_file[column]['acc,none'] | |
new_file['model_dtype'] = v['config']["model_dtype"] | |
new_file['ppl'] = 0 | |
print('WE READ FILE: ', new_file) | |
buf = BytesIO() | |
buf.write(json.dumps(new_file).encode('utf-8')) | |
API.upload_file( | |
path_or_fileobj=buf, | |
path_in_repo="model_data/external/" + profile.username+mn + ".json", | |
repo_id="kz-transformers/s-openbench-eval", | |
repo_type="dataset", | |
) | |
os.environ[RESET_JUDGEMENT_ENV] = "1" | |
return "Success!" | |
from src.display.about import ( | |
INTRODUCTION_TEXT, | |
TITLE, | |
LLM_BENCHMARKS_TEXT | |
) | |
from src.display.css_html_js import custom_css | |
from src.display.utils import ( | |
AutoEvalColumn, | |
fields, | |
) | |
from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV | |
from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset | |
import huggingface_hub | |
# huggingface_hub.login(token=H4_TOKEN) | |
os.environ["GRADIO_ANALYTICS_ENABLED"] = "false" | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
# Start ephemeral Spaces on PRs (see config in README.md) | |
enable_space_ci() | |
# download_openbench() | |
def restart_space(): | |
API.restart_space(repo_id=REPO_ID) | |
download_openbench() | |
def update_plot(selected_models): | |
return create_plot(selected_models) | |
def build_demo(): | |
download_openbench() | |
demo = gr.Blocks(title="Kaz LLM LB", css=custom_css) | |
leaderboard_df = build_leadearboard_df() | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons"): | |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): | |
Leaderboard( | |
value=leaderboard_df, | |
datatype=[c.type for c in fields(AutoEvalColumn)], | |
select_columns=SelectColumns( | |
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], | |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy], | |
label="Select Columns to Display:", | |
), | |
search_columns=[ | |
AutoEvalColumn.model.name, | |
# AutoEvalColumn.fullname.name, | |
# AutoEvalColumn.license.name | |
], | |
) | |
# with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1): | |
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
# with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2): | |
# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text") | |
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3): | |
with gr.Row(): | |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
with gr.Row(): | |
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text") | |
with gr.Column(): | |
model_name_textbox = gr.Textbox(label="Model name") | |
# submitter_username = gr.Textbox(label="Username") | |
# def toggle_upload_button(model_name, username): | |
# return bool(model_name) and bool(username) | |
file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath") | |
# upload_button = gr.Button("Click to Upload & Submit Answers", elem_id="upload_button",variant='primary') | |
uploaded_file = gr.State() | |
file_path = gr.State() | |
with gr.Row(): | |
with gr.Column(): | |
out = gr.Textbox("Статус отправки") | |
with gr.Column(): | |
login_button = gr.LoginButton(elem_id="oauth-button") | |
submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary') | |
file_output.upload( | |
handle_file_upload, | |
file_output, | |
[uploaded_file, file_path] | |
) | |
submit_button.click( | |
submit_file, | |
[uploaded_file, file_path, model_name_textbox], | |
[out] | |
) | |
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4): | |
with gr.Column(): | |
model_dropdown = gr.Dropdown( | |
choices=leaderboard_df["model"].tolist(), | |
label="Models", | |
value=leaderboard_df["model"].tolist(), | |
multiselect=True, | |
info="Select models" | |
) | |
with gr.Column(): | |
plot = gr.Plot(update_plot(model_dropdown.value)) | |
# plot = gr.Plot() | |
model_dropdown.change( | |
fn=update_plot, | |
inputs=[model_dropdown], | |
outputs=[plot] | |
) | |
return demo | |
# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py')) | |
# print(os.system('cd src/gen/ && python show_result.py --output')) | |
def update_board(): | |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV) | |
logging.info("Updating the judgement: %s", need_reset) | |
if need_reset != "1": | |
# return | |
pass | |
os.environ[RESET_JUDGEMENT_ENV] = "0" | |
# `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents | |
# recursively. In this specific context, it is used to delete the directory named "m_data" along | |
# with all its files and subdirectories. This command helps in cleaning up the existing data in | |
# the "m_data" directory before downloading new dataset files into it. | |
# shutil.rmtree("./m_data") | |
# shutil.rmtree("./data") | |
download_dataset("kz-transformers/s-openbench-eval", "m_data") | |
data_list = [] | |
for file in glob.glob("./m_data/model_data/external/*.json"): | |
with open(file) as f: | |
try: | |
data = json.load(f) | |
data_list.append(data) | |
except Exception as e: | |
pass # data was badly formatted, should not fail | |
print("DATALIST: ", data_list) | |
with open("genned.json", "w") as f: | |
json.dump(data_list, f) | |
API.upload_file( | |
path_or_fileobj="genned.json", | |
path_in_repo="leaderboard.json", | |
repo_id="kz-transformers/kaz-llm-lb-metainfo", | |
repo_type="dataset", | |
) | |
restart_space() | |
# gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py") | |
# subprocess.run(["python3", gen_judgement_file], check=True) | |
def update_board_(): | |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV) | |
logging.info("Updating the judgement: %s", need_reset) | |
if need_reset != "1": | |
# return | |
pass | |
os.environ[RESET_JUDGEMENT_ENV] = "0" | |
# `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents | |
# recursively. In this specific context, it is used to delete the directory named "m_data" along | |
# with all its files and subdirectories. This command helps in cleaning up the existing data in | |
# the "m_data" directory before downloading new dataset files into it. | |
# shutil.rmtree("./m_data") | |
# shutil.rmtree("./data") | |
download_dataset("kz-transformers/s-openbench-eval", "m_data") | |
data_list = [] | |
for file in glob.glob("./m_data/model_data/external/*.json"): | |
with open(file) as f: | |
try: | |
data = json.load(f) | |
data_list.append(data) | |
except Exception as e: | |
pass # data was badly formatted, should not fail | |
print("DATALIST: ", data_list) | |
with open("genned.json", "w") as f: | |
json.dump(data_list, f) | |
API.upload_file( | |
path_or_fileobj="genned.json", | |
path_in_repo="leaderboard.json", | |
repo_id="kz-transformers/kaz-llm-lb-metainfo", | |
repo_type="dataset", | |
) | |
if __name__ == "__main__": | |
os.environ[RESET_JUDGEMENT_ENV] = "1" | |
scheduler = BackgroundScheduler() | |
update_board_() | |
scheduler.add_job(update_board, "interval", minutes=10) | |
scheduler.start() | |
demo_app = build_demo() | |
demo_app.launch(debug=True,share=True) | |