jbnayahu commited on
Commit
bc7425f
Β·
unverified Β·
1 Parent(s): 7689326

Signed-off-by: Jonathan Bnayahu <[email protected]>

Files changed (2) hide show
  1. app.py +6 -5
  2. src/display/utils.py +1 -14
app.py CHANGED
@@ -30,11 +30,12 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
30
  def init_leaderboard(dataframe):
31
  if dataframe is None or dataframe.empty:
32
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
33
  return Leaderboard(
34
  value=dataframe,
35
  datatype=[c.type for c in fields(AutoEvalColumn)],
36
  search_columns=[AutoEvalColumn.model.name],
37
- interactive=False,
38
  )
39
 
40
  def download_csv():
@@ -42,15 +43,15 @@ def download_csv():
42
  LEADERBOARD_DF.to_csv(buffer, index=False)
43
  return buffer.getvalue()
44
 
45
- demo = gr.Blocks(css=custom_css)
46
- with demo:
47
  gr.HTML(TITLE_IMAGE)
48
  gr.HTML(TITLE)
49
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
50
 
51
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
52
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
53
- leaderboard = init_leaderboard(LEADERBOARD_DF)
54
 
55
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
56
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
@@ -82,4 +83,4 @@ with demo:
82
  scheduler = BackgroundScheduler()
83
  scheduler.add_job(restart_space, "interval", seconds=1800)
84
  scheduler.start()
85
- demo.queue(default_concurrency_limit=40).launch()
 
30
  def init_leaderboard(dataframe):
31
  if dataframe is None or dataframe.empty:
32
  raise ValueError("Leaderboard DataFrame is empty or None.")
33
+
34
  return Leaderboard(
35
  value=dataframe,
36
  datatype=[c.type for c in fields(AutoEvalColumn)],
37
  search_columns=[AutoEvalColumn.model.name],
38
+ interactive=False
39
  )
40
 
41
  def download_csv():
 
43
  LEADERBOARD_DF.to_csv(buffer, index=False)
44
  return buffer.getvalue()
45
 
46
+ gui = gr.Blocks(css=custom_css)
47
+ with gui:
48
  gr.HTML(TITLE_IMAGE)
49
  gr.HTML(TITLE)
50
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
51
 
52
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
53
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
54
+ leaderboard = init_leaderboard(LEADERBOARD_DF.style.highlight_max(color = 'lightgreen', axis=0).data)
55
 
56
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
57
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
83
  scheduler = BackgroundScheduler()
84
  scheduler.add_job(restart_space, "interval", seconds=1800)
85
  scheduler.start()
86
+ gui.queue(default_concurrency_limit=40).launch()
src/display/utils.py CHANGED
@@ -25,23 +25,13 @@ auto_eval_column_dict = []
25
  # Init
26
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
27
  #Scores
28
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
29
  for task in Tasks:
30
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
31
 
32
  # We use make dataclass to dynamically fill the scores from Tasks
33
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
34
 
35
- ## For the queue columns in the submission tab
36
- @dataclass(frozen=True)
37
- class EvalQueueColumn: # Queue column
38
- model = ColumnContent("model", "markdown", True)
39
- revision = ColumnContent("revision", "str", True)
40
- private = ColumnContent("private", "bool", True)
41
- precision = ColumnContent("precision", "str", True)
42
- weight_type = ColumnContent("weight_type", "str", "Original")
43
- status = ColumnContent("status", "str", True)
44
-
45
  ## All the model information that we might need
46
  @dataclass
47
  class ModelDetails:
@@ -52,8 +42,5 @@ class ModelDetails:
52
  # Column selection
53
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
54
 
55
- EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
56
- EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
57
-
58
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
59
 
 
25
  # Init
26
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
27
  #Scores
28
+ auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
29
  for task in Tasks:
30
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
31
 
32
  # We use make dataclass to dynamically fill the scores from Tasks
33
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
34
 
 
 
 
 
 
 
 
 
 
 
35
  ## All the model information that we might need
36
  @dataclass
37
  class ModelDetails:
 
42
  # Column selection
43
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
44
 
 
 
 
45
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
46