Spaces:
Running
Running
michal
commited on
Commit
·
c5afbf5
1
Parent(s):
64298d7
Upload
Browse files- app.py +120 -0
- leaderboards/all_types_years.json +0 -0
- leaderboards/llmzszl.json +410 -0
- src/abouts.py +90 -0
- src/envs.py +25 -0
- src/images/logo.png +0 -0
- src/structures/gim.py +42 -0
- src/structures/leaderboard_structure.py +53 -0
- src/structures/mat.py +42 -0
- src/structures/osm.py +42 -0
- src/structures/zaw.py +42 -0
- src/styles.py +108 -0
- src/utils.py +1 -0
app.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_leaderboard import Leaderboard
|
3 |
+
from pathlib import Path
|
4 |
+
import pandas as pd
|
5 |
+
from src.styles import custom_css
|
6 |
+
|
7 |
+
from src.structures.leaderboard_structure import (LB_LLMZSZL,
|
8 |
+
ORDER_LIST,
|
9 |
+
DATA_TYPES,
|
10 |
+
COLUMN_HEADERS,
|
11 |
+
filter_data,
|
12 |
+
filter_columns,
|
13 |
+
)
|
14 |
+
from src.structures.gim import GIM_SCORES
|
15 |
+
from src.structures.zaw import ZAW_SCORES
|
16 |
+
from src.structures.mat import MAT_SCORES
|
17 |
+
from src.structures.osm import OSM_SCORES
|
18 |
+
|
19 |
+
global data_component
|
20 |
+
from src.abouts import *
|
21 |
+
|
22 |
+
main = gr.Blocks(css=custom_css)
|
23 |
+
with main:
|
24 |
+
with gr.Row():
|
25 |
+
with gr.Column():
|
26 |
+
image = gr.Image("src/images/logo.png",
|
27 |
+
show_download_button=False,
|
28 |
+
show_share_button=False,
|
29 |
+
show_fullscreen_button=False,
|
30 |
+
container=False)
|
31 |
+
with gr.Column():
|
32 |
+
gr.HTML(HEADER_TITLE)
|
33 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
34 |
+
with gr.Tab("🏅 LLMZSZL"):
|
35 |
+
gr.Markdown("""## Overall scores""")
|
36 |
+
# Checkbox to toggle column visibility
|
37 |
+
columns_selector = gr.CheckboxGroup(
|
38 |
+
choices=ORDER_LIST,
|
39 |
+
label="Select columns to display",
|
40 |
+
value=ORDER_LIST,
|
41 |
+
)
|
42 |
+
# Dataframe component to display the leaderboard data
|
43 |
+
data_component = gr.components.Dataframe(
|
44 |
+
value=LB_LLMZSZL,
|
45 |
+
headers=COLUMN_HEADERS,
|
46 |
+
type="pandas",
|
47 |
+
datatype=DATA_TYPES,
|
48 |
+
interactive=False,
|
49 |
+
visible=True,
|
50 |
+
column_widths=[400, 200, 100, 120, 100]
|
51 |
+
)
|
52 |
+
# def update_data(selected_columns, selected_languages):
|
53 |
+
# return update_dataframe(selected_columns, selected_languages)
|
54 |
+
|
55 |
+
def update_dataframe(selected_columns):
|
56 |
+
return filter_columns(selected_columns)
|
57 |
+
|
58 |
+
columns_selector.change(update_dataframe, inputs=columns_selector, outputs=data_component)
|
59 |
+
# language_selector.change(update_data, inputs=[columns_selector, language_selector], outputs=data_component)
|
60 |
+
|
61 |
+
|
62 |
+
with gr.Tab("📝 Middle School exam"):
|
63 |
+
gr.Markdown(GIM_DESC)
|
64 |
+
data_component = gr.components.Dataframe(
|
65 |
+
value=GIM_SCORES,
|
66 |
+
type="pandas",
|
67 |
+
interactive=False,
|
68 |
+
visible=True,
|
69 |
+
datatype=["markdown"]+["number"]*18,
|
70 |
+
column_widths=[400] + [80] * 18
|
71 |
+
)
|
72 |
+
|
73 |
+
with gr.Tab("📝 8-grade exam"):
|
74 |
+
gr.Markdown(OSM_DESC)
|
75 |
+
data_component = gr.components.Dataframe(
|
76 |
+
value=OSM_SCORES,
|
77 |
+
type="pandas",
|
78 |
+
interactive=False,
|
79 |
+
visible=True,
|
80 |
+
datatype=["markdown"]+["number"]*5,
|
81 |
+
column_widths=[400] + [80] * 5
|
82 |
+
)
|
83 |
+
|
84 |
+
with gr.Tab("📝 High School exam"):
|
85 |
+
gr.Markdown(MAT_DESC)
|
86 |
+
data_component = gr.components.Dataframe(
|
87 |
+
value=MAT_SCORES,
|
88 |
+
type="pandas",
|
89 |
+
interactive=False,
|
90 |
+
visible=True,
|
91 |
+
datatype=["markdown"]+["number"]*22,
|
92 |
+
column_widths=[400] + [80] * 22
|
93 |
+
)
|
94 |
+
|
95 |
+
with gr.Tab("📝 Professional exam"):
|
96 |
+
gr.Markdown(ZAW_DESC)
|
97 |
+
data_component = gr.components.Dataframe(
|
98 |
+
value=ZAW_SCORES,
|
99 |
+
type="pandas",
|
100 |
+
interactive=False,
|
101 |
+
visible=True,
|
102 |
+
datatype=["markdown"]+["number"]*12,
|
103 |
+
column_widths=[400] + [80] * 12
|
104 |
+
)
|
105 |
+
|
106 |
+
with gr.Tab("📝 About"):
|
107 |
+
gr.Markdown(ABOUT)
|
108 |
+
|
109 |
+
with gr.Column():
|
110 |
+
with gr.Accordion("📙 Citation", open=False):
|
111 |
+
citation_button = gr.Textbox(
|
112 |
+
value="TEST",
|
113 |
+
label="TEST_LABEL",
|
114 |
+
lines=20,
|
115 |
+
elem_id="citation-button",
|
116 |
+
show_copy_button=True,
|
117 |
+
)
|
118 |
+
|
119 |
+
if __name__ == "__main__":
|
120 |
+
main.launch()
|
leaderboards/all_types_years.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
leaderboards/llmzszl.json
ADDED
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"Lang": "E",
|
4 |
+
"Family": "Llama",
|
5 |
+
"Name": "meta-llama/Llama-2-7b-hf",
|
6 |
+
"Parameters (B)": 7,
|
7 |
+
"Date": "23-07",
|
8 |
+
"Score": 28.04
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"Lang": "E",
|
12 |
+
"Family": "Llama",
|
13 |
+
"Name": "meta-llama/Llama-2-13b-hf",
|
14 |
+
"Parameters (B)": 13,
|
15 |
+
"Date": "23-07",
|
16 |
+
"Score": 33.85
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"Lang": "E",
|
20 |
+
"Family": "Llama",
|
21 |
+
"Name": "meta-llama/Llama-2-70b-hf",
|
22 |
+
"Parameters (B)": 70,
|
23 |
+
"Date": "23-07",
|
24 |
+
"Score": 46.38
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"Lang": "E",
|
28 |
+
"Family": "Phi",
|
29 |
+
"Name": "microsoft/phi-1",
|
30 |
+
"Parameters (B)": 1,
|
31 |
+
"Date": "24-04",
|
32 |
+
"Score": 25.73
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"Lang": "E",
|
36 |
+
"Family": "Phi",
|
37 |
+
"Name": "microsoft/phi-1_5",
|
38 |
+
"Parameters (B)": 1,
|
39 |
+
"Date": "24-04",
|
40 |
+
"Score": 24.25
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"Lang": "E",
|
44 |
+
"Family": "Phi",
|
45 |
+
"Name": "microsoft/phi-2",
|
46 |
+
"Parameters (B)": 3,
|
47 |
+
"Date": "24-01",
|
48 |
+
"Score": 25.6
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"Lang": "E",
|
52 |
+
"Family": "Phi",
|
53 |
+
"Name": "microsoft/Phi-3-mini-4k-instruct",
|
54 |
+
"Parameters (B)": 4,
|
55 |
+
"Date": "24-07",
|
56 |
+
"Score": 33.44
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"Lang": "E",
|
60 |
+
"Family": "Qwen",
|
61 |
+
"Name": "Qwen/Qwen2-1.5B",
|
62 |
+
"Parameters (B)": 5,
|
63 |
+
"Date": "24-05",
|
64 |
+
"Score": 34.19
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"Lang": "E",
|
68 |
+
"Family": "Qwen",
|
69 |
+
"Name": "Qwen/Qwen2-7B",
|
70 |
+
"Parameters (B)": 7,
|
71 |
+
"Date": "24-06",
|
72 |
+
"Score": 45.59
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"Lang": "E",
|
76 |
+
"Family": "gemma",
|
77 |
+
"Name": "google/gemma-7b",
|
78 |
+
"Parameters (B)": 7,
|
79 |
+
"Date": "24-02",
|
80 |
+
"Score": 46.84
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"Lang": "P",
|
84 |
+
"Family": "Bielik",
|
85 |
+
"Name": "speakleash/Bielik-7B-v0.1",
|
86 |
+
"Parameters (B)": 7,
|
87 |
+
"Date": "24-03",
|
88 |
+
"Score": 39.15
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"Lang": "P",
|
92 |
+
"Family": "Bielik",
|
93 |
+
"Name": "speakleash/Bielik-7B-Instruct-v0.1",
|
94 |
+
"Parameters (B)": 7,
|
95 |
+
"Date": "24-03",
|
96 |
+
"Score": 40.77
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"Lang": "P",
|
100 |
+
"Family": "Bielik",
|
101 |
+
"Name": "speakleash/Bielik-11B-v2",
|
102 |
+
"Parameters (B)": 11,
|
103 |
+
"Date": "24-08",
|
104 |
+
"Score": 55.14
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"Lang": "P",
|
108 |
+
"Family": "Bielik",
|
109 |
+
"Name": "speakleash/Bielik-11B-v2.0-Instruct",
|
110 |
+
"Parameters (B)": 11,
|
111 |
+
"Date": "24-08",
|
112 |
+
"Score": 55.61
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"Lang": "P",
|
116 |
+
"Family": "Bielik",
|
117 |
+
"Name": "speakleash/Bielik-11B-v2.1-Instruct",
|
118 |
+
"Parameters (B)": 11,
|
119 |
+
"Date": "24-08",
|
120 |
+
"Score": 57.52
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"Lang": "P",
|
124 |
+
"Family": "Bielik",
|
125 |
+
"Name": "speakleash/Bielik-11B-v2.2-Instruct",
|
126 |
+
"Parameters (B)": 11,
|
127 |
+
"Date": "24-08",
|
128 |
+
"Score": 57.36
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"Lang": "P",
|
132 |
+
"Family": "Qra",
|
133 |
+
"Name": "OPI-PG/Qra-1b",
|
134 |
+
"Parameters (B)": 1,
|
135 |
+
"Date": "24-02",
|
136 |
+
"Score": 25.47
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"Lang": "P",
|
140 |
+
"Family": "Qra",
|
141 |
+
"Name": "OPI-PG/Qra-7b",
|
142 |
+
"Parameters (B)": 7,
|
143 |
+
"Date": "24-02",
|
144 |
+
"Score": 29.07
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"Lang": "P",
|
148 |
+
"Family": "Qra",
|
149 |
+
"Name": "OPI-PG/Qra-13b",
|
150 |
+
"Parameters (B)": 13,
|
151 |
+
"Date": "24-02",
|
152 |
+
"Score": 34.85
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"Lang": "P",
|
156 |
+
"Family": "polish-gpt2",
|
157 |
+
"Name": "sdadas/polish-gpt2-small",
|
158 |
+
"Parameters (B)": 0.2,
|
159 |
+
"Date": "22-09",
|
160 |
+
"Score": 24.19
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"Lang": "P",
|
164 |
+
"Family": "polish-gpt2",
|
165 |
+
"Name": "sdadas/polish-gpt2-medium",
|
166 |
+
"Parameters (B)": 0.5,
|
167 |
+
"Date": "22-09",
|
168 |
+
"Score": 24.4
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"Lang": "P",
|
172 |
+
"Family": "polish-gpt2",
|
173 |
+
"Name": "sdadas/polish-gpt2-large",
|
174 |
+
"Parameters (B)": 0.9,
|
175 |
+
"Date": "23-01",
|
176 |
+
"Score": 24.89
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"Lang": "P",
|
180 |
+
"Family": "polish-gpt2",
|
181 |
+
"Name": "sdadas/polish-gpt2-xl",
|
182 |
+
"Parameters (B)": 2,
|
183 |
+
"Date": "23-01",
|
184 |
+
"Score": 23.98
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"Lang": "P",
|
188 |
+
"Family": "trurl",
|
189 |
+
"Name": "Voicelab/trurl-2-7b-8bit",
|
190 |
+
"Parameters (B)": 7,
|
191 |
+
"Date": "23-08",
|
192 |
+
"Score": 31.86
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"Lang": "P",
|
196 |
+
"Family": "trurl",
|
197 |
+
"Name": "Voicelab/trurl-2-7b",
|
198 |
+
"Parameters (B)": 7,
|
199 |
+
"Date": "23-08",
|
200 |
+
"Score": 32.3
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"Lang": "P",
|
204 |
+
"Family": "trurl",
|
205 |
+
"Name": "Voicelab/trurl-2-13b",
|
206 |
+
"Parameters (B)": 13,
|
207 |
+
"Date": "23-08",
|
208 |
+
"Score": 40.22
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"Lang": "P",
|
212 |
+
"Family": "trurl",
|
213 |
+
"Name": "Voicelab/trurl-2-13b-8bit",
|
214 |
+
"Parameters (B)": 13,
|
215 |
+
"Date": "23-08",
|
216 |
+
"Score": 40.23
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"Lang": "P",
|
220 |
+
"Family": "trurl",
|
221 |
+
"Name": "Voicelab/trurl-2-13b-academic",
|
222 |
+
"Parameters (B)": 13,
|
223 |
+
"Date": "23-98",
|
224 |
+
"Score": 34.89
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"Lang": "m",
|
228 |
+
"Family": "Llama",
|
229 |
+
"Name": "meta-llama/Meta-Llama-3-8B",
|
230 |
+
"Parameters (B)": 8,
|
231 |
+
"Date": "24-04",
|
232 |
+
"Score": 41.38
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"Lang": "m",
|
236 |
+
"Family": "Llama",
|
237 |
+
"Name": "meta-llama/Meta-Llama-3-8B-Instruct",
|
238 |
+
"Parameters (B)": 8,
|
239 |
+
"Date": "24-04",
|
240 |
+
"Score": 44.83
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"Lang": "m",
|
244 |
+
"Family": "Llama",
|
245 |
+
"Name": "meta-llama/Meta-Llama-3-70B",
|
246 |
+
"Parameters (B)": 70,
|
247 |
+
"Date": "24-04",
|
248 |
+
"Score": 62.22
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"Lang": "m",
|
252 |
+
"Family": "Llama",
|
253 |
+
"Name": "meta-llama/Meta-Llama-3-70B-Instruct",
|
254 |
+
"Parameters (B)": 70,
|
255 |
+
"Date": "24-04",
|
256 |
+
"Score": 64.04
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"Lang": "m",
|
260 |
+
"Family": "Llama",
|
261 |
+
"Name": "meta-llama/Meta-Llama-3.1-8B",
|
262 |
+
"Parameters (B)": 8,
|
263 |
+
"Date": "24-07",
|
264 |
+
"Score": 44.21
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"Lang": "m",
|
268 |
+
"Family": "Llama",
|
269 |
+
"Name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
270 |
+
"Parameters (B)": 8,
|
271 |
+
"Date": "24-07",
|
272 |
+
"Score": 47.41
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"Lang": "m",
|
276 |
+
"Family": "Llama",
|
277 |
+
"Name": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
278 |
+
"Parameters (B)": 70,
|
279 |
+
"Date": "24-07",
|
280 |
+
"Score": 66.59
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"Lang": "m",
|
284 |
+
"Family": "Mistral",
|
285 |
+
"Name": "mistralai/Mistral-7B-v0.1",
|
286 |
+
"Parameters (B)": 7,
|
287 |
+
"Date": "23-12",
|
288 |
+
"Score": 37.75
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"Lang": "m",
|
292 |
+
"Family": "Mistral",
|
293 |
+
"Name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
294 |
+
"Parameters (B)": 7,
|
295 |
+
"Date": "23-12",
|
296 |
+
"Score": 49.46
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"Lang": "m",
|
300 |
+
"Family": "Mistral",
|
301 |
+
"Name": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
302 |
+
"Parameters (B)": 141,
|
303 |
+
"Date": "24-04",
|
304 |
+
"Score": 58.17
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"Lang": "m",
|
308 |
+
"Family": "Mistral",
|
309 |
+
"Name": "mistralai/Mistral-7B-Instruct-v0.1",
|
310 |
+
"Parameters (B)": 7,
|
311 |
+
"Date": "23-12",
|
312 |
+
"Score": 35.98
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"Lang": "m",
|
316 |
+
"Family": "Mistral",
|
317 |
+
"Name": "mistralai/Mistral-7B-Instruct-v0.2",
|
318 |
+
"Parameters (B)": 7,
|
319 |
+
"Date": "23-12",
|
320 |
+
"Score": 40.75
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"Lang": "m",
|
324 |
+
"Family": "Mistral",
|
325 |
+
"Name": "mistralai/Mistral-7B-v0.3",
|
326 |
+
"Parameters (B)": 7,
|
327 |
+
"Date": "24-05",
|
328 |
+
"Score": 37.08
|
329 |
+
},
|
330 |
+
{
|
331 |
+
"Lang": "m",
|
332 |
+
"Family": "Mistral",
|
333 |
+
"Name": "mistralai/Mistral-7B-Instruct-v0.3",
|
334 |
+
"Parameters (B)": 7,
|
335 |
+
"Date": "24-05",
|
336 |
+
"Score": 41.72
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"Lang": "m",
|
340 |
+
"Family": "Mistral",
|
341 |
+
"Name": "mistralai/Mistral-Large-Instruct-2407",
|
342 |
+
"Parameters (B)": 123,
|
343 |
+
"Date": "24-07",
|
344 |
+
"Score": 67.17
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"Lang": "m",
|
348 |
+
"Family": "WizardLM",
|
349 |
+
"Name": "lucyknada/microsoft_WizardLM-2-7B",
|
350 |
+
"Parameters (B)": 7,
|
351 |
+
"Date": "24-04",
|
352 |
+
"Score": 38.23
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"Lang": "m",
|
356 |
+
"Family": "Yi",
|
357 |
+
"Name": "01-ai/Yi-34B-Chat-4bits",
|
358 |
+
"Parameters (B)": 34,
|
359 |
+
"Date": "23-11",
|
360 |
+
"Score": 40.28
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"Lang": "m",
|
364 |
+
"Family": "Yi",
|
365 |
+
"Name": "01-ai/Yi-34B-Chat",
|
366 |
+
"Parameters (B)": 34,
|
367 |
+
"Date": "23-11",
|
368 |
+
"Score": 41.42
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"Lang": "m",
|
372 |
+
"Family": "Yi",
|
373 |
+
"Name": "01-ai/Yi-34B-200K",
|
374 |
+
"Parameters (B)": 34,
|
375 |
+
"Date": "24-03",
|
376 |
+
"Score": 37.56
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"Lang": "m",
|
380 |
+
"Family": "Yi",
|
381 |
+
"Name": "01-ai/Yi-1.5-9B",
|
382 |
+
"Parameters (B)": 9,
|
383 |
+
"Date": "24-05",
|
384 |
+
"Score": 37.06
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"Lang": "m",
|
388 |
+
"Family": "Yi",
|
389 |
+
"Name": "01-ai/Yi-1.5-9B-Chat",
|
390 |
+
"Parameters (B)": 9,
|
391 |
+
"Date": "24-05",
|
392 |
+
"Score": 37.59
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"Lang": "m",
|
396 |
+
"Family": "Yi",
|
397 |
+
"Name": "01-ai/Yi-1.5-34B",
|
398 |
+
"Parameters (B)": 34,
|
399 |
+
"Date": "24-05",
|
400 |
+
"Score": 41.14
|
401 |
+
},
|
402 |
+
{
|
403 |
+
"Lang": "m",
|
404 |
+
"Family": "Yi",
|
405 |
+
"Name": "01-ai/Yi-1.5-34B-Chat",
|
406 |
+
"Parameters (B)": 34,
|
407 |
+
"Date": "24-05",
|
408 |
+
"Score": 41.47
|
409 |
+
}
|
410 |
+
]
|
src/abouts.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
HEADER_TITLE = """
|
2 |
+
<div style="display: flex; flex-wrap: wrap; justify-content: space-around; align-items: center;">
|
3 |
+
<div>
|
4 |
+
<h1 align="center" id="space-title">Adam Mickiewicz University's Center for Artificial Intelligence</h1>
|
5 |
+
<h2 align="center" id="space-subtitle">LLMzSzŁ: a comprehensive LLM benchmark for Polish</h2>
|
6 |
+
</div>
|
7 |
+
</div>
|
8 |
+
"""
|
9 |
+
|
10 |
+
MAIN_DESC = """## Overall scores"""
|
11 |
+
|
12 |
+
GIM_DESC = """### Middle School Exam
|
13 |
+
|
14 |
+
The **Middle School Exam** (*egzamin gimnazjalny*) was formerly a key component of Poland’s secondary education system. This exam was taken by students completing their lower secondary education.
|
15 |
+
|
16 |
+
#### Subjects Covered
|
17 |
+
|
18 |
+
This exam assessed students in core subjects like:
|
19 |
+
|
20 |
+
- **Polish Language**
|
21 |
+
- **Mathematics**
|
22 |
+
- **Science**
|
23 |
+
- **Foreign Language**
|
24 |
+
|
25 |
+
preparing them for higher levels of education.
|
26 |
+
|
27 |
+
#### Recent Changes
|
28 |
+
|
29 |
+
Due to recent educational reforms in Poland:
|
30 |
+
|
31 |
+
- **Middle Schools Phased Out**: Middle schools have been removed from the education system.
|
32 |
+
- **Exam Discontinued**: With this change, the Middle School Exam has also been discontinued.
|
33 |
+
"""
|
34 |
+
|
35 |
+
MAT_DESC = """### High School Exam
|
36 |
+
|
37 |
+
The **High School Exam** (*matura*) is one of the most critical exams in Poland. This exam serves as a prerequisite for higher education and is a significant milestone in students' academic journeys.
|
38 |
+
|
39 |
+
#### Mandatory Subjects
|
40 |
+
|
41 |
+
The *matura* exam includes essential subjects:
|
42 |
+
|
43 |
+
- **Polish Language**
|
44 |
+
- **Mathematics**
|
45 |
+
- **Foreign Language**
|
46 |
+
|
47 |
+
#### Additional Subjects
|
48 |
+
|
49 |
+
Students may also choose additional subjects based on:
|
50 |
+
|
51 |
+
- **Areas of Interest**: Students select subjects aligned with their strengths or future studies.
|
52 |
+
- **University Requirements**: Specific subjects may be necessary for admission to certain university programs.
|
53 |
+
|
54 |
+
#### Minimum points required
|
55 |
+
To pass the High School Exam, students needed to achieve a minimum score of **30%** in mandatory subjects. Additional subjects do not require a minimum score to pass, but the results significantly impact the points needed for university admission.
|
56 |
+
"""
|
57 |
+
|
58 |
+
OSM_DESC = """### 8th-Grade Exam
|
59 |
+
|
60 |
+
The **8th-grade exam** (*egzamin ósmoklasisty*) is a standardized assessment taken by Polish students at the end of their primary education (8th grade).
|
61 |
+
|
62 |
+
#### Subjects Covered
|
63 |
+
|
64 |
+
This exam mainly includes tests in:
|
65 |
+
|
66 |
+
- **Polish Language**
|
67 |
+
- **Mathematics**
|
68 |
+
- **Foreign Language**
|
69 |
+
|
70 |
+
#### Purpose and Role
|
71 |
+
|
72 |
+
- **Secondary Education Foundation**: Provides a foundation for secondary education placements, guiding students to the next stage of their academic journey.
|
73 |
+
- **Student Assessment**: Evaluates core competencies, ensuring readiness for high school-level studies.
|
74 |
+
"""
|
75 |
+
|
76 |
+
ZAW_DESC = """### Professional Exam
|
77 |
+
|
78 |
+
The **Professional Exam** (*egzamin zawodowy*) is tailored for students pursuing vocational education in Poland. This exam is typically taken after completing vocational training programs, which may range from:
|
79 |
+
|
80 |
+
- **Technical High Schools**
|
81 |
+
- **Post-secondary Vocational Courses**
|
82 |
+
|
83 |
+
#### Purpose and Importance
|
84 |
+
|
85 |
+
- **Certification**: A successful score on this exam certifies a student’s qualifications in a specific trade or profession.
|
86 |
+
- **Career Readiness**: Enables students to enter the workforce directly.
|
87 |
+
- **Further Education**: Offers a foundation for pursuing specialized training.
|
88 |
+
"""
|
89 |
+
|
90 |
+
ABOUT = "## ABOUTS"
|
src/envs.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from huggingface_hub import HfApi
|
4 |
+
|
5 |
+
# Info to change for your repository
|
6 |
+
# ----------------------------------
|
7 |
+
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
8 |
+
|
9 |
+
OWNER = "MCiesiolka" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
10 |
+
# ----------------------------------
|
11 |
+
|
12 |
+
REPO_ID = f"{OWNER}/test_leaderboard"
|
13 |
+
QUEUE_REPO = f"{OWNER}/requests"
|
14 |
+
RESULTS_REPO = f"{OWNER}/results"
|
15 |
+
|
16 |
+
# If you setup a cache later, just change HF_HOME
|
17 |
+
CACHE_PATH=os.getenv("HF_HOME", ".")
|
18 |
+
|
19 |
+
# Local caches
|
20 |
+
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
21 |
+
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
22 |
+
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
23 |
+
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
24 |
+
|
25 |
+
API = HfApi(token=TOKEN)
|
src/images/logo.png
ADDED
![]() |
src/structures/gim.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
# Define the absolute path to the file
|
5 |
+
abs_path = Path(__file__).parent.parent.parent
|
6 |
+
|
7 |
+
def load_json_data(file_path):
|
8 |
+
# Load the JSON data
|
9 |
+
GIM_SCORES = pd.read_json(file_path)
|
10 |
+
|
11 |
+
# Reset index so model names become a column and transpose for (year, name) pairs as rows
|
12 |
+
GIM_SCORES = GIM_SCORES.T.reset_index()
|
13 |
+
|
14 |
+
# Rename the first column as 'Model' to keep model names visible
|
15 |
+
GIM_SCORES.rename(columns={'index': 'Model'}, inplace=True)
|
16 |
+
|
17 |
+
# Filter columns that contain 'Egzaminy Gimnazjalne' in the name
|
18 |
+
filtered_columns = ['Model'] + [col for col in GIM_SCORES.columns if "Egzaminy Gimnazjalne" in col]
|
19 |
+
GIM_SCORES = GIM_SCORES[filtered_columns]
|
20 |
+
GIM_SCORES["Model"] = GIM_SCORES["Model"].apply(
|
21 |
+
lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
|
22 |
+
)
|
23 |
+
|
24 |
+
# Round numeric values to 2 decimal places
|
25 |
+
numeric_columns = GIM_SCORES.columns[1:] # Get all year columns
|
26 |
+
GIM_SCORES[numeric_columns] = GIM_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
|
27 |
+
GIM_SCORES[numeric_columns] = GIM_SCORES[numeric_columns].round(2)
|
28 |
+
|
29 |
+
# Convert year part in column names to strings for Gradio compatibility
|
30 |
+
GIM_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in GIM_SCORES.columns]
|
31 |
+
year_columns = GIM_SCORES.columns[1:]
|
32 |
+
sorted_year_columns = sorted(year_columns.astype(str).tolist()) # Sort the year columns as strings
|
33 |
+
sorted_columns = ['Model'] + sorted_year_columns
|
34 |
+
GIM_SCORES = GIM_SCORES[sorted_columns]
|
35 |
+
return GIM_SCORES
|
36 |
+
|
37 |
+
# Define file path
|
38 |
+
file_path = str(abs_path / "leaderboards/all_types_years.json")
|
39 |
+
GIM_SCORES = load_json_data(file_path)
|
40 |
+
GIM_SCORES = GIM_SCORES.style.highlight_max(
|
41 |
+
color = '#ff7070',
|
42 |
+
subset=GIM_SCORES.columns[-18:]).format(precision=2)
|
src/structures/leaderboard_structure.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
abs_path = Path(__file__).parent.parent.parent
|
5 |
+
|
6 |
+
ORDER_LIST = ["Name", "Lang", "Score", "Parameters (B)", "Date"]
|
7 |
+
COLUMN_HEADERS = ["Name", "Language", "Score", "Parameters (B)", "Date"]
|
8 |
+
DATA_TYPES = ["markdown", "str", "number", "number", "str"]
|
9 |
+
|
10 |
+
def filter_data(selected_columns, search_query):
|
11 |
+
df = LB_LLMZSZL[selected_columns]
|
12 |
+
if search_query:
|
13 |
+
df = df[df['Name'].str.contains(search_query, case=False, na=False)]
|
14 |
+
return df
|
15 |
+
|
16 |
+
def filter_row(language):
|
17 |
+
if language:
|
18 |
+
return LB_LLMZSZL[LB_LLMZSZL["Lang"] == language]
|
19 |
+
return LB_LLMZSZL
|
20 |
+
|
21 |
+
def filter_columns(column_choices):
|
22 |
+
selected_columns = [col for col in ORDER_LIST if col in column_choices]
|
23 |
+
return LB_LLMZSZL[selected_columns]
|
24 |
+
|
25 |
+
def load_json_data(file_path, order_list):
|
26 |
+
LB_LLMZSZL = pd.read_json(file_path)
|
27 |
+
|
28 |
+
for column in LB_LLMZSZL.columns:
|
29 |
+
if LB_LLMZSZL[column].apply(type).eq(dict).any():
|
30 |
+
LB_LLMZSZL[column] = LB_LLMZSZL[column].apply(str)
|
31 |
+
|
32 |
+
LB_LLMZSZL["Name"] = LB_LLMZSZL["Name"].apply(
|
33 |
+
lambda name: f"[{name}](https://huggingface.co/{name})"
|
34 |
+
)
|
35 |
+
|
36 |
+
lang_replacements = {
|
37 |
+
'E': 'English',
|
38 |
+
'P': 'Polish',
|
39 |
+
'm': 'Multilingual'
|
40 |
+
}
|
41 |
+
|
42 |
+
LB_LLMZSZL["Lang"] = LB_LLMZSZL["Lang"].apply(
|
43 |
+
lambda lang_code: lang_replacements.get(lang_code, lang_code) # Replace using the dictionary, keep original if not found
|
44 |
+
)
|
45 |
+
|
46 |
+
ordered_columns = [col for col in order_list if col in LB_LLMZSZL.columns]
|
47 |
+
LB_LLMZSZL = LB_LLMZSZL[ordered_columns]
|
48 |
+
LB_LLMZSZL = LB_LLMZSZL.sort_values(by="Score", ascending=False)
|
49 |
+
return LB_LLMZSZL
|
50 |
+
|
51 |
+
|
52 |
+
file_path = str(abs_path / "leaderboards/llmzszl.json")
|
53 |
+
LB_LLMZSZL = load_json_data(file_path, ORDER_LIST)
|
src/structures/mat.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
# Define the absolute path to the file
|
5 |
+
abs_path = Path(__file__).parent.parent.parent
|
6 |
+
|
7 |
+
def load_json_data(file_path):
|
8 |
+
# Load the JSON data
|
9 |
+
MAT_SCORES = pd.read_json(file_path)
|
10 |
+
|
11 |
+
# Reset index so model names become a column and transpose for (year, name) pairs as rows
|
12 |
+
MAT_SCORES = MAT_SCORES.T.reset_index()
|
13 |
+
|
14 |
+
# Rename the first column as 'Model' to keep model names visible
|
15 |
+
MAT_SCORES.rename(columns={'index': 'Model'}, inplace=True)
|
16 |
+
|
17 |
+
# Filter columns that contain 'Egzaminy Gimnazjalne' in the name
|
18 |
+
filtered_columns = ['Model'] + [col for col in MAT_SCORES.columns if "Egzaminy Maturalne" in col]
|
19 |
+
MAT_SCORES = MAT_SCORES[filtered_columns]
|
20 |
+
MAT_SCORES["Model"] = MAT_SCORES["Model"].apply(
|
21 |
+
lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
|
22 |
+
)
|
23 |
+
|
24 |
+
# Round numeric values to 2 decimal places
|
25 |
+
numeric_columns = MAT_SCORES.columns[1:] # Get all year columns
|
26 |
+
MAT_SCORES[numeric_columns] = MAT_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
|
27 |
+
MAT_SCORES[numeric_columns] = MAT_SCORES[numeric_columns].round(2)
|
28 |
+
|
29 |
+
# Convert year part in column names to strings for Gradio compatibility
|
30 |
+
MAT_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in MAT_SCORES.columns]
|
31 |
+
year_columns = MAT_SCORES.columns[1:]
|
32 |
+
sorted_year_columns = sorted(year_columns.astype(str).tolist()) # Sort the year columns as strings
|
33 |
+
sorted_columns = ['Model'] + sorted_year_columns
|
34 |
+
MAT_SCORES = MAT_SCORES[sorted_columns]
|
35 |
+
return MAT_SCORES
|
36 |
+
|
37 |
+
# Define file path
|
38 |
+
file_path = str(abs_path / "leaderboards/all_types_years.json")
|
39 |
+
MAT_SCORES = load_json_data(file_path)
|
40 |
+
MAT_SCORES = MAT_SCORES.style.highlight_max(
|
41 |
+
color = '#ff7070',
|
42 |
+
subset=MAT_SCORES.columns[-22:]).format(precision=2)
|
src/structures/osm.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
# Define the absolute path to the file
|
5 |
+
abs_path = Path(__file__).parent.parent.parent
|
6 |
+
|
7 |
+
def load_json_data(file_path):
|
8 |
+
# Load the JSON data
|
9 |
+
OSM_SCORES = pd.read_json(file_path)
|
10 |
+
|
11 |
+
# Reset index so model names become a column and transpose for (year, name) pairs as rows
|
12 |
+
OSM_SCORES = OSM_SCORES.T.reset_index()
|
13 |
+
|
14 |
+
# Rename the first column as 'Model' to keep model names visible
|
15 |
+
OSM_SCORES.rename(columns={'index': 'Model'}, inplace=True)
|
16 |
+
|
17 |
+
# Filter columns that contain 'Egzaminy Gimnazjalne' in the name
|
18 |
+
filtered_columns = ['Model'] + [col for col in OSM_SCORES.columns if "Egzaminy Ósmoklasisty" in col]
|
19 |
+
OSM_SCORES = OSM_SCORES[filtered_columns]
|
20 |
+
OSM_SCORES["Model"] = OSM_SCORES["Model"].apply(
|
21 |
+
lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
|
22 |
+
)
|
23 |
+
|
24 |
+
# Round numeric values to 2 decimal places
|
25 |
+
numeric_columns = OSM_SCORES.columns[1:] # Get all year columns
|
26 |
+
OSM_SCORES[numeric_columns] = OSM_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
|
27 |
+
OSM_SCORES[numeric_columns] = OSM_SCORES[numeric_columns].round(2)
|
28 |
+
|
29 |
+
# Convert year part in column names to strings for Gradio compatibility
|
30 |
+
OSM_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in OSM_SCORES.columns]
|
31 |
+
year_columns = OSM_SCORES.columns[1:]
|
32 |
+
sorted_year_columns = sorted(year_columns.astype(str).tolist()) # Sort the year columns as strings
|
33 |
+
sorted_columns = ['Model'] + sorted_year_columns
|
34 |
+
OSM_SCORES = OSM_SCORES[sorted_columns]
|
35 |
+
return OSM_SCORES
|
36 |
+
|
37 |
+
# Define file path
|
38 |
+
file_path = str(abs_path / "leaderboards/all_types_years.json")
|
39 |
+
OSM_SCORES = load_json_data(file_path)
|
40 |
+
OSM_SCORES = OSM_SCORES.style.highlight_max(
|
41 |
+
color = '#ff7070',
|
42 |
+
subset=OSM_SCORES.columns[-5:]).format(precision=2)
|
src/structures/zaw.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
# Define the absolute path to the file
|
5 |
+
abs_path = Path(__file__).parent.parent.parent
|
6 |
+
|
7 |
+
def load_json_data(file_path):
|
8 |
+
# Load the JSON data
|
9 |
+
ZAW_SCORES = pd.read_json(file_path)
|
10 |
+
|
11 |
+
# Reset index so model names become a column and transpose for (year, name) pairs as rows
|
12 |
+
ZAW_SCORES = ZAW_SCORES.T.reset_index()
|
13 |
+
|
14 |
+
# Rename the first column as 'Model' to keep model names visible
|
15 |
+
ZAW_SCORES.rename(columns={'index': 'Model'}, inplace=True)
|
16 |
+
|
17 |
+
# Filter columns that contain 'Egzaminy Gimnazjalne' in the name
|
18 |
+
filtered_columns = ['Model'] + [col for col in ZAW_SCORES.columns if "Egzaminy Zawodowe" in col]
|
19 |
+
ZAW_SCORES = ZAW_SCORES[filtered_columns]
|
20 |
+
ZAW_SCORES["Model"] = ZAW_SCORES["Model"].apply(
|
21 |
+
lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
|
22 |
+
)
|
23 |
+
|
24 |
+
# Round numeric values to 2 decimal places
|
25 |
+
numeric_columns = ZAW_SCORES.columns[1:] # Get all year columns
|
26 |
+
ZAW_SCORES[numeric_columns] = ZAW_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
|
27 |
+
ZAW_SCORES[numeric_columns] = ZAW_SCORES[numeric_columns].round(2)
|
28 |
+
|
29 |
+
# Convert year part in column names to strings for Gradio compatibility
|
30 |
+
ZAW_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in ZAW_SCORES.columns]
|
31 |
+
year_columns = ZAW_SCORES.columns[1:]
|
32 |
+
sorted_year_columns = sorted(year_columns.astype(str).tolist()) # Sort the year columns as strings
|
33 |
+
sorted_columns = ['Model'] + sorted_year_columns
|
34 |
+
ZAW_SCORES = ZAW_SCORES[sorted_columns]
|
35 |
+
return ZAW_SCORES
|
36 |
+
|
37 |
+
# Define file path
|
38 |
+
file_path = str(abs_path / "leaderboards/all_types_years.json")
|
39 |
+
ZAW_SCORES = load_json_data(file_path)
|
40 |
+
ZAW_SCORES = ZAW_SCORES.style.highlight_max(
|
41 |
+
color = '#ff7070',
|
42 |
+
subset=ZAW_SCORES.columns[-12:]).format(precision=2)
|
src/styles.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
custom_css = """
|
2 |
+
.markdown-text {
|
3 |
+
font-size: 16px !important;
|
4 |
+
}
|
5 |
+
#models-to-add-text {
|
6 |
+
font-size: 18px !important;
|
7 |
+
}
|
8 |
+
#citation-button span {
|
9 |
+
font-size: 16px !important;
|
10 |
+
}
|
11 |
+
#citation-button textarea {
|
12 |
+
font-size: 16px !important;
|
13 |
+
}
|
14 |
+
#citation-button > label > button {
|
15 |
+
margin: 6px;
|
16 |
+
transform: scale(1.3);
|
17 |
+
}
|
18 |
+
#leaderboard-table {
|
19 |
+
margin-top: 15px
|
20 |
+
}
|
21 |
+
#leaderboard-table-lite {
|
22 |
+
margin-top: 15px
|
23 |
+
}
|
24 |
+
#search-bar-table-box > div:first-child {
|
25 |
+
background: none;
|
26 |
+
border: none;
|
27 |
+
}
|
28 |
+
|
29 |
+
#search-bar {
|
30 |
+
padding: 0px;
|
31 |
+
}
|
32 |
+
/* Hides the final AutoEvalColumn */
|
33 |
+
#llm-benchmark-tab-table table td:last-child,
|
34 |
+
#llm-benchmark-tab-table table th:last-child {
|
35 |
+
display: none;
|
36 |
+
}
|
37 |
+
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
38 |
+
table td:first-child,
|
39 |
+
table th:first-child {
|
40 |
+
max-width: 400px;
|
41 |
+
overflow: auto;
|
42 |
+
white-space: nowrap;
|
43 |
+
}
|
44 |
+
table > tbody > tr > td:nth-child(3) > div {
|
45 |
+
overflow-x: auto;
|
46 |
+
width: 450px;
|
47 |
+
}
|
48 |
+
tbody span {
|
49 |
+
-webkit-user-select: text;
|
50 |
+
-moz-user-select: text;
|
51 |
+
-ms-user-select: text;
|
52 |
+
user-select: text;
|
53 |
+
}
|
54 |
+
.tab-buttons button {
|
55 |
+
font-size: 20px;
|
56 |
+
}
|
57 |
+
#scale-logo {
|
58 |
+
border-style: none !important;
|
59 |
+
box-shadow: none;
|
60 |
+
display: block;
|
61 |
+
margin-left: auto;
|
62 |
+
margin-right: auto;
|
63 |
+
max-width: 600px;
|
64 |
+
}
|
65 |
+
#scale-logo .download {
|
66 |
+
display: none;
|
67 |
+
}
|
68 |
+
#filter_type{
|
69 |
+
border: 0;
|
70 |
+
padding-left: 0;
|
71 |
+
padding-top: 0;
|
72 |
+
}
|
73 |
+
#filter_type label {
|
74 |
+
display: flex;
|
75 |
+
}
|
76 |
+
#filter_type label > span{
|
77 |
+
margin-top: var(--spacing-lg);
|
78 |
+
margin-right: 0.5em;
|
79 |
+
}
|
80 |
+
#filter_type label > .wrap{
|
81 |
+
width: 103px;
|
82 |
+
}
|
83 |
+
#filter_type label > .wrap .wrap-inner{
|
84 |
+
padding: 2px;
|
85 |
+
}
|
86 |
+
#filter_type label > .wrap .wrap-inner input{
|
87 |
+
width: 1px
|
88 |
+
}
|
89 |
+
#filter-columns-type{
|
90 |
+
border:0;
|
91 |
+
padding:0.5;
|
92 |
+
}
|
93 |
+
#filter-columns-size{
|
94 |
+
border:0;
|
95 |
+
padding:0.5;
|
96 |
+
}
|
97 |
+
#box-filter > .form{
|
98 |
+
border: 0
|
99 |
+
}
|
100 |
+
"""
|
101 |
+
|
102 |
+
get_window_url_params = """
|
103 |
+
function(url_params) {
|
104 |
+
const params = new URLSearchParams(window.location.search);
|
105 |
+
url_params = Object.fromEntries(params);
|
106 |
+
return url_params;
|
107 |
+
}
|
108 |
+
"""
|
src/utils.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|