File size: 3,246 Bytes
8fca823
 
23f22ff
 
8fca823
 
 
 
 
 
 
 
 
 
 
 
23f22ff
8fca823
23f22ff
8fca823
 
 
 
 
23f22ff
 
 
 
 
 
 
 
 
8fca823
23f22ff
 
 
8fca823
23f22ff
 
 
 
 
8fca823
23f22ff
8fca823
23f22ff
8fca823
23f22ff
8fca823
 
 
 
 
 
 
948d4dc
8fca823
 
23f22ff
 
 
 
 
 
 
 
8fca823
948d4dc
 
8fca823
948d4dc
8fca823
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
import pandas as pd
import json
from gradio_leaderboard import Leaderboard, SelectColumns
from apscheduler.schedulers.background import BackgroundScheduler

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    AutoEvalColumn,
    fields
)
from src.envs import API, REPO_ID


def restart_space():
    API.restart_space(repo_id=REPO_ID)

def init_leaderboard(data_file):
    with open(data_file, "r") as fp:
        data = json.load(fp)

        dataframe = pd.DataFrame()
        for key, value in data.items():
            col_df = pd.DataFrame(value)
            col_df.rename(columns={"Pass_at_1": key}, inplace=True)
            dataframe = col_df if dataframe.empty else dataframe.merge(col_df, on=['Context', 'Method', 'Model'], how='outer')

        dataframe['Score'] = dataframe.drop(columns=['Context', 'Method', 'Model']).sum(axis=1) / 5
        numeric_cols = dataframe.select_dtypes(include='number').columns
        dataframe[numeric_cols] = dataframe[numeric_cols].apply(lambda x: x * 100).round(1)

        cols = list(dataframe.columns)
        cols.remove('Score')
        cols.insert(3, 'Score')
        dataframe = dataframe[cols]
        cols.insert(3, cols.pop(cols.index('Score')))

        dataframe = dataframe.sort_values(by='Score', ascending=False)

    return gr.components.DataFrame(
        value=dataframe,
        headers=[c.name for c in fields(AutoEvalColumn) if not c.hidden],
        datatype=[c.type for c in fields(AutoEvalColumn)],
        interactive=False,
    )

demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.HTML(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("[Method] Evaluation", elem_id="llm-benchmark-tab-table", id=0):
            leaderboard = init_leaderboard("./data/data_method.json")

        with gr.TabItem("[Context] Evaluation", elem_id="llm-benchmark-tab-table", id=1):
            leaderboard = init_leaderboard("./data/data_context.json")

        with gr.TabItem("[Incremental] Evaluation", elem_id="llm-benchmark-tab-table", id=2):
            leaderboard = init_leaderboard("./data/data_incr-order.json")

        # with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
        #     gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("πŸš€ Submission", elem_id="llm-benchmark-tab-table", id=3):
            with gr.Column():
                with gr.Row():
                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

    with gr.Row():
        with gr.Accordion("πŸ“™ Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                lines=20,
                elem_id="citation-button",
                show_copy_button=True,
            )

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch()