Spaces:

AIR-Bench
/

leaderboard_backend

Running

App Files Files Community

hanhainebula commited on Sep 5, 2024

Commit

48e891a

1 Parent(s): c1c3d39

upload code for showing submit infos table

Browse files

Files changed (4) hide show

app.py +28 -51
src/backend.py +60 -6
src/css_html_js.py +105 -0
src/envs.py +11 -3

app.py CHANGED Viewed

@@ -1,52 +1,60 @@
 import os
 import logging
 import gradio as gr
 import multiprocessing
 from src.backend import pull_search_results
 from src.envs import (
     API, REPO_ID, START_COMMIT_ID,
-    LOG_DIR, LOG_FILE_PATH, HF_CACHE_DIR,
     HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR,
     UNZIP_TARGET_DIR,
     TIME_DURATION,
     EVAL_K_VALUES,
 )
 logger = logging.getLogger(__name__)
 logging.basicConfig(
-    filename=LOG_FILE_PATH,
-    filemode='w',
     level=logging.WARNING,
-    datefmt='%Y-%m-%d %H:%M:%S',
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-def get_log_files():
-    if not os.path.exists(LOG_DIR):
-        return []
-    return sorted([f for f in os.listdir(LOG_DIR) if f.endswith('.log')])
-def refresh_log_files():
-    return get_log_files()
-def display_log_content(selected_file):
-    if selected_file:
-        with open(os.path.join(LOG_DIR, selected_file), 'r', encoding='utf-8') as file:
-            return file.read()
-    return "No log file selected"
 if __name__ == "__main__":
-    os.makedirs(LOG_DIR, exist_ok=True)
     process = multiprocessing.Process(
         target=pull_search_results,
         args=(
@@ -60,35 +68,4 @@ if __name__ == "__main__":
         ),
     )
     process.start()
-    with gr.Blocks() as demo:
-        gr.Markdown("## Select a log file to view its content")
-        log_file_dropdown = gr.Dropdown(
-            choices=refresh_log_files(),
-            label="Select log file",
-            interactive=True,
-        )
-        log_content_box = gr.Textbox(
-            label="Log content",
-            lines=20,
-            interactive=False,
-        )
-        log_file_list_box = gr.Textbox(
-            label="\n".join(get_log_files()),
-            lines=20,
-            interactive=False,
-        )
-        refresh_button = gr.Button("Refresh log files")
-        log_file_dropdown.change(
-            fn=display_log_content,
-            inputs=log_file_dropdown,
-            outputs=log_content_box,
-        )
-        refresh_button.click(
-            fn=refresh_log_files,
-            outputs=log_file_dropdown,
-        )
     demo.launch()

 import os
+import json
 import logging
+import pandas as pd
 import gradio as gr
 import multiprocessing
 from src.backend import pull_search_results
 from src.envs import (
     API, REPO_ID, START_COMMIT_ID,
+    HF_CACHE_DIR, SUBMIT_INFOS_SAVE_PATH,
     HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR,
     UNZIP_TARGET_DIR,
     TIME_DURATION,
     EVAL_K_VALUES,
 )
+from src.css_html_js import custom_css
 logger = logging.getLogger(__name__)
 logging.basicConfig(
     level=logging.WARNING,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
+# def restart_space():
+#     API.restart_space(repo_id=REPO_ID)
+def load_submit_infos_df():
+    if os.path.exists(SUBMIT_INFOS_SAVE_PATH):
+        with open(SUBMIT_INFOS_SAVE_PATH, 'r', encoding='utf-8') as f:
+            submit_infos = json.load(f)
+    else:
+        submit_infos = []
+    submit_infos_df = pd.DataFrame(submit_infos)
+    return submit_infos_df
+with gr.Blocks(css=custom_css) as demo:
+        gr.Markdown("## Submission Infos Table")
+        table = gr.Dataframe(
+            value=load_submit_infos_df(),
+            label="Submission Infos",
+            interactive=False,
+        )
+        refresh_button = gr.Button("Refresh Submission Infos")
+        refresh_button.click(
+            fn=load_submit_infos_df,
+            outputs=table,
+        )
 if __name__ == "__main__":
     process = multiprocessing.Process(
         target=pull_search_results,
         args=(
         ),
     )
     process.start()
     demo.launch()

src/backend.py CHANGED Viewed

@@ -13,16 +13,14 @@ from air_benchmark.evaluation_utils.evaluator import Evaluator
 from src.envs import (
     API,
-    LOG_FILE_PATH, ZIP_CACHE_DIR,
-    SEARCH_RESULTS_REPO, RESULTS_REPO
 )
 logger = logging.getLogger(__name__)
 logging.basicConfig(
-    filename=LOG_FILE_PATH,
-    filemode='w',
     level=logging.WARNING,
-    datefmt='%Y-%m-%d %H:%M:%S',
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
@@ -111,6 +109,49 @@ def get_zip_file_path(zip_file_name: str):
     return zip_file_path
 def pull_search_results(
     hf_search_results_repo_dir: str,
     hf_eval_results_repo_dir: str,
@@ -132,6 +173,13 @@ def pull_search_results(
         )
         cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
     else:
         cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
     print("Start to pull new search results ...")
@@ -280,7 +328,13 @@ def pull_search_results(
         shutil.rmtree(ZIP_CACHE_DIR)
         shutil.rmtree(unzip_target_dir)
         # Wait for the next update
         logger.warning(f"Wait for {time_duration} seconds for the next update ...")
-        cur_file_paths = new_file_paths
         time.sleep(time_duration)

 from src.envs import (
     API,
+    ZIP_CACHE_DIR,SUBMIT_INFOS_SAVE_PATH,
+    SEARCH_RESULTS_REPO, RESULTS_REPO,
+    make_clickable_model
 )
 logger = logging.getLogger(__name__)
 logging.basicConfig(
     level=logging.WARNING,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
     return zip_file_path
+def find_file(file_name: str, dir_path: str):
+    if not os.path.exists(dir_path) or not os.path.isdir(dir_path):
+        return False
+    for root, _, files in os.walk(dir_path):
+        for file in files:
+            if file == file_name:
+                return True
+    return False
+def get_submit_infos_list(file_paths: List[str], eval_results_dir: str) -> dict:
+    submit_infos_list = []
+    for file_path in file_paths:
+        submit_info = {
+            'Rank': None,
+            'Submission Date': None,
+            'Benchmark Version': None,
+            'Retrieval Method': None,
+            'Reranking Method': None,
+            'Revision': None,
+            'Status': None,
+        }
+        file_name = os.path.basename(file_path).split('.')[0]
+        rank_time = file_name.split('-')[0]
+        with open(file_path, 'r', encoding='utf-8') as f:
+            metadata = json.load(f)
+        submit_info['Rank'] = rank_time
+        submit_info['Submission Date'] = metadata['timestamp']
+        submit_info['Benchmark Version'] = metadata['version']
+        submit_info['Retrieval Method'] = make_clickable_model(metadata['model_name'], metadata['model_url'])
+        submit_info['Reranking Method'] = make_clickable_model(metadata['reranker_name'], metadata['reranker_url'])
+        submit_info['Revision'] = metadata['revision']
+        if find_file(f"results_{file_name}.json", eval_results_dir):
+            submit_info['Status'] = "✔️ Success"
+        else:
+            submit_info['Status'] = "❌ Failed"
+        submit_infos_list.append(submit_info)
+    sorted_submit_infos_list = sorted(submit_infos_list, key=lambda x: x['Rank'], reverse=True)
+    for rank, submit_info in enumerate(sorted_submit_infos_list, 1):
+        submit_info['Rank'] = rank
+    return sorted_submit_infos_list
 def pull_search_results(
     hf_search_results_repo_dir: str,
     hf_eval_results_repo_dir: str,
         )
         cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
     else:
+        API.snapshot_download(
+            repo_id=SEARCH_RESULTS_REPO,
+            repo_type="dataset",
+            local_dir=hf_search_results_repo_dir,
+            etag_timeout=30,
+            allow_patterns=['*.json']
+        )
         cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
     print("Start to pull new search results ...")
         shutil.rmtree(ZIP_CACHE_DIR)
         shutil.rmtree(unzip_target_dir)
+        # update submit infos
+        cur_file_paths = new_file_paths
+        submit_infos_list = get_submit_infos_list(cur_file_paths, hf_eval_results_repo_dir)
+        with open(SUBMIT_INFOS_SAVE_PATH, 'w', encoding='utf-8') as f:
+            json.dump(submit_infos_list, f, ensure_ascii=False, indent=4)
         # Wait for the next update
         logger.warning(f"Wait for {time_duration} seconds for the next update ...")
         time.sleep(time_duration)

src/css_html_js.py ADDED Viewed

	@@ -0,0 +1,105 @@

+custom_css = """
+.markdown-text {
+    font-size: 16px !important;
+}
+#models-to-add-text {
+    font-size: 18px !important;
+}
+#citation-button span {
+    font-size: 16px !important;
+}
+#citation-button textarea {
+    font-size: 16px !important;
+}
+#citation-button > label > button {
+    margin: 6px;
+    transform: scale(1.3);
+}
+#leaderboard-table {
+    margin-top: 15px
+}
+#leaderboard-table-lite {
+    margin-top: 15px
+}
+#search-bar-table-box > div:first-child {
+    background: none;
+    border: none;
+}
+#search-bar {
+    padding: 0px;
+}
+/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
+table td:first-child,
+table th:first-child {
+    max-width: 400px;
+    overflow: auto;
+    white-space: nowrap;
+}
+.tab-buttons button {
+    font-size: 20px;
+}
+#scale-logo {
+    border-style: none !important;
+    box-shadow: none;
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+    max-width: 600px;
+}
+#scale-logo .download {
+    display: none;
+}
+#filter_type{
+    border: 0;
+    padding-left: 0;
+    padding-top: 0;
+}
+#filter_type label {
+    display: flex;
+}
+#filter_type label > span{
+    margin-top: var(--spacing-lg);
+    margin-right: 0.5em;
+}
+#filter_type label > .wrap{
+    width: 103px;
+}
+#filter_type label > .wrap .wrap-inner{
+    padding: 2px;
+}
+#filter_type label > .wrap .wrap-inner input{
+    width: 1px
+}
+#filter-columns-type{
+    border:0;
+    padding:0.5;
+}
+#filter-columns-size{
+    border:0;
+    padding:0.5;
+}
+#box-filter > .form{
+    border: 0
+}
+"""
+get_window_url_params = """
+    function(url_params) {
+        const params = new URLSearchParams(window.location.search);
+        url_params = Object.fromEntries(params);
+        return url_params;
+    }
+    """

src/envs.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import time
 from huggingface_hub import HfApi
@@ -22,8 +21,7 @@ CACHE_PATH = os.getenv("HF_HOME", ".")
 HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
 ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
-LOG_DIR = os.path.join(CACHE_PATH, "logs")
-LOG_FILE_PATH = os.path.join(LOG_DIR, f"backend_{time.strftime('%Y-%m-%d_%H-%M-%S')}.log")
 API = HfApi(token=HF_TOKEN)
@@ -35,3 +33,13 @@ UNZIP_TARGET_DIR = os.path.join(CACHE_PATH, "unzip_target_dir")
 TIME_DURATION = 300  # seconds
 EVAL_K_VALUES = [1, 3, 5, 10, 50, 100, 1000]

 import os
 from huggingface_hub import HfApi
 HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
 ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
+SUBMIT_INFOS_SAVE_PATH = os.path.join(CACHE_PATH, "submit_infos.json")
 API = HfApi(token=HF_TOKEN)
 TIME_DURATION = 300  # seconds
 EVAL_K_VALUES = [1, 3, 5, 10, 50, 100, 1000]
+def model_hyperlink(link, model_name):
+    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+def make_clickable_model(model_name: str, model_link: str):
+    # link = f"https://huggingface.co/{model_name}"
+    if not model_link or not model_link.startswith("https://"):
+        return model_name
+    return model_hyperlink(model_link, model_name)