Paul Hager commited on
Commit
d614158
Β·
1 Parent(s): 170ba5c

Added second leaderboard

Browse files
Files changed (3) hide show
  1. app.py +22 -99
  2. src/about.py +4 -11
  3. src/envs.py +4 -2
app.py CHANGED
@@ -23,7 +23,7 @@ from src.display.utils import (
23
  WeightType,
24
  Precision,
25
  )
26
- from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
27
  from src.populate import get_leaderboard_df
28
 
29
 
@@ -33,10 +33,10 @@ def restart_space():
33
 
34
  ### Space initialisation
35
  try:
36
- print(EVAL_RESULTS_PATH)
37
  snapshot_download(
38
  repo_id=RESULTS_REPO,
39
- local_dir=EVAL_RESULTS_PATH,
40
  repo_type="dataset",
41
  tqdm_class=None,
42
  etag_timeout=30,
@@ -45,15 +45,22 @@ try:
45
  except Exception:
46
  restart_space()
47
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
50
-
51
- # (
52
- # finished_eval_queue_df,
53
- # running_eval_queue_df,
54
- # pending_eval_queue_df,
55
- # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
56
 
 
 
57
 
58
  def init_leaderboard(dataframe):
59
  if dataframe is None or dataframe.empty:
@@ -91,99 +98,15 @@ with demo:
91
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
92
 
93
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
94
- with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
95
- leaderboard = init_leaderboard(LEADERBOARD_DF)
 
 
 
96
 
97
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
98
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
99
 
100
- # with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
101
- # with gr.Column():
102
- # with gr.Row():
103
- # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
104
-
105
- # with gr.Column():
106
- # with gr.Accordion(
107
- # f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
108
- # open=False,
109
- # ):
110
- # with gr.Row():
111
- # finished_eval_table = gr.components.Dataframe(
112
- # value=finished_eval_queue_df,
113
- # headers=EVAL_COLS,
114
- # datatype=EVAL_TYPES,
115
- # row_count=5,
116
- # )
117
- # with gr.Accordion(
118
- # f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
119
- # open=False,
120
- # ):
121
- # with gr.Row():
122
- # running_eval_table = gr.components.Dataframe(
123
- # value=running_eval_queue_df,
124
- # headers=EVAL_COLS,
125
- # datatype=EVAL_TYPES,
126
- # row_count=5,
127
- # )
128
-
129
- # with gr.Accordion(
130
- # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
131
- # open=False,
132
- # ):
133
- # with gr.Row():
134
- # pending_eval_table = gr.components.Dataframe(
135
- # value=pending_eval_queue_df,
136
- # headers=EVAL_COLS,
137
- # datatype=EVAL_TYPES,
138
- # row_count=5,
139
- # )
140
- # with gr.Row():
141
- # gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
142
-
143
- # with gr.Row():
144
- # with gr.Column():
145
- # model_name_textbox = gr.Textbox(label="Model name")
146
- # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
147
- # model_type = gr.Dropdown(
148
- # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
149
- # label="Model type",
150
- # multiselect=False,
151
- # value=None,
152
- # interactive=True,
153
- # )
154
-
155
- # with gr.Column():
156
- # precision = gr.Dropdown(
157
- # choices=[i.value.name for i in Precision if i != Precision.Unknown],
158
- # label="Precision",
159
- # multiselect=False,
160
- # value="float16",
161
- # interactive=True,
162
- # )
163
- # weight_type = gr.Dropdown(
164
- # choices=[i.value.name for i in WeightType],
165
- # label="Weights type",
166
- # multiselect=False,
167
- # value="Original",
168
- # interactive=True,
169
- # )
170
- # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
171
-
172
- # submit_button = gr.Button("Submit Eval")
173
- # submission_result = gr.Markdown()
174
- # submit_button.click(
175
- # add_new_eval,
176
- # [
177
- # model_name_textbox,
178
- # base_model_name_textbox,
179
- # revision_name_textbox,
180
- # precision,
181
- # weight_type,
182
- # model_type,
183
- # ],
184
- # submission_result,
185
- # )
186
-
187
  with gr.Row():
188
  with gr.Accordion("πŸ“™ Citation", open=False):
189
  citation_button = gr.Textbox(
 
23
  WeightType,
24
  Precision,
25
  )
26
+ from src.envs import API, EVAL_RESULTS_PATH_CDM, EVAL_RESULTS_PATH_CDM_FI, REPO_ID, RESULTS_REPO, TOKEN
27
  from src.populate import get_leaderboard_df
28
 
29
 
 
33
 
34
  ### Space initialisation
35
  try:
36
+ print(EVAL_RESULTS_PATH_CDM)
37
  snapshot_download(
38
  repo_id=RESULTS_REPO,
39
+ local_dir=EVAL_RESULTS_PATH_CDM,
40
  repo_type="dataset",
41
  tqdm_class=None,
42
  etag_timeout=30,
 
45
  except Exception:
46
  restart_space()
47
 
48
+ try:
49
+ print(EVAL_RESULTS_PATH_CDM_FI)
50
+ snapshot_download(
51
+ repo_id=RESULTS_REPO,
52
+ local_dir=EVAL_RESULTS_PATH_CDM_FI,
53
+ repo_type="dataset",
54
+ tqdm_class=None,
55
+ etag_timeout=30,
56
+ token=TOKEN,
57
+ )
58
+ except Exception:
59
+ restart_space()
60
 
 
 
 
 
 
 
 
61
 
62
+ LEADERBOARD_DF_CDM = get_leaderboard_df(EVAL_RESULTS_PATH_CDM, COLS, BENCHMARK_COLS)
63
+ LEADERBOARD_DF_CDM_FI = get_leaderboard_df(EVAL_RESULTS_PATH_CDM_FI, COLS, BENCHMARK_COLS)
64
 
65
  def init_leaderboard(dataframe):
66
  if dataframe is None or dataframe.empty:
 
98
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
99
 
100
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
101
+ with gr.TabItem("MIMIC CDM", elem_id="llm-benchmark-tab-table", id=0):
102
+ leaderboard = init_leaderboard(LEADERBOARD_DF_CDM)
103
+
104
+ with gr.TabItem("MIMIC CDM FI", elem_id="llm-benchmark-tab-table", id=0):
105
+ leaderboard = init_leaderboard(LEADERBOARD_DF_CDM_FI)
106
 
107
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
108
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  with gr.Row():
111
  with gr.Accordion("πŸ“™ Citation", open=False):
112
  citation_button = gr.Textbox(
src/about.py CHANGED
@@ -13,17 +13,10 @@ class Task:
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
- task0 = Task("MIMIC_CDM_Appendicitis", "acc", "CDM App")
17
- task1 = Task("MIMIC_CDM_Cholecystitis", "acc", "CDM Cholec")
18
- task2 = Task("MIMIC_CDM_Diverticulitis", "acc", "CDM Divert")
19
- task3 = Task("MIMIC_CDM_Pancreatitis", "acc", "CDM Pancr")
20
- task4 = Task("MIMIC_CDM_Mean", "acc", "CDM Mean")
21
-
22
- task5 = Task("MIMIC_CDM_FI_Appendicitis", "acc", "CDM FI App")
23
- task6 = Task("MIMIC_CDM_FI_Cholecystitis", "acc", "CDM FI Cholec")
24
- task7 = Task("MIMIC_CDM_FI_Diverticulitis", "acc", "CDM FI Divert")
25
- task8 = Task("MIMIC_CDM_FI_Pancreatitis", "acc", "CDM FI Pancr")
26
- task9 = Task("MIMIC_CDM_FI_Mean", "acc", "CDM FI Mean")
27
 
28
 
29
  NUM_FEWSHOT = 0 # Change with your few shot
 
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
+ task0 = Task("Appendicitis", "acc", "Appendicits")
17
+ task1 = Task("Cholecystitis", "acc", "Cholecystitis")
18
+ task2 = Task("Diverticulitis", "acc", "Diverticulitis")
19
+ task3 = Task("Pancreatitis", "acc", "Pancreatitis")
 
 
 
 
 
 
 
20
 
21
 
22
  NUM_FEWSHOT = 0 # Change with your few shot
src/envs.py CHANGED
@@ -13,14 +13,16 @@ OWNER = (
13
 
14
  REPO_ID = f"{OWNER}/leaderboard"
15
  # QUEUE_REPO = f"{OWNER}/requests"
16
- RESULTS_REPO = f"{OWNER}/results"
 
17
 
18
  # If you setup a cache later, just change HF_HOME
19
  CACHE_PATH = os.getenv("HF_HOME", ".")
20
 
21
  # Local caches
22
  # EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
23
- EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
 
24
  # EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
25
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
26
 
 
13
 
14
  REPO_ID = f"{OWNER}/leaderboard"
15
  # QUEUE_REPO = f"{OWNER}/requests"
16
+ RESULTS_REPO_CDM = f"{OWNER}/results-CDM"
17
+ RESULTS_REPO_CDM_FI = f"{OWNER}/results-CDM-FI"
18
 
19
  # If you setup a cache later, just change HF_HOME
20
  CACHE_PATH = os.getenv("HF_HOME", ".")
21
 
22
  # Local caches
23
  # EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
24
+ EVAL_RESULTS_PATH_CDM = os.path.join(CACHE_PATH, "eval-results-CDM")
25
+ EVAL_RESULTS_PATH_CDM_FI = os.path.join(CACHE_PATH, "eval-results-CDM-FI")
26
  # EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
27
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
28