qyhfrank commited on
Commit
bb8527c
·
1 Parent(s): da489da

Init Commit

Browse files
Files changed (8) hide show
  1. .gitignore +254 -0
  2. app.py +59 -0
  3. css_html_js.py +111 -0
  4. leaderboard.py +111 -0
  5. leaderboard_table_20240520.csv +18 -0
  6. requirements.txt +1 -0
  7. theme.json +1 -0
  8. utils.py +116 -0
.gitignore ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,macos,windows
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,macos,windows
3
+
4
+ ### macOS ###
5
+ # General
6
+ .DS_Store
7
+ .AppleDouble
8
+ .LSOverride
9
+
10
+ # Icon must end with two \r
11
+ Icon
12
+
13
+
14
+ # Thumbnails
15
+ ._*
16
+
17
+ # Files that might appear in the root of a volume
18
+ .DocumentRevisions-V100
19
+ .fseventsd
20
+ .Spotlight-V100
21
+ .TemporaryItems
22
+ .Trashes
23
+ .VolumeIcon.icns
24
+ .com.apple.timemachine.donotpresent
25
+
26
+ # Directories potentially created on remote AFP share
27
+ .AppleDB
28
+ .AppleDesktop
29
+ Network Trash Folder
30
+ Temporary Items
31
+ .apdisk
32
+
33
+ ### macOS Patch ###
34
+ # iCloud generated files
35
+ *.icloud
36
+
37
+ ### Python ###
38
+ # Byte-compiled / optimized / DLL files
39
+ __pycache__/
40
+ *.py[cod]
41
+ *$py.class
42
+
43
+ # C extensions
44
+ *.so
45
+
46
+ # Distribution / packaging
47
+ .Python
48
+ build/
49
+ develop-eggs/
50
+ dist/
51
+ downloads/
52
+ eggs/
53
+ .eggs/
54
+ lib/
55
+ lib64/
56
+ parts/
57
+ sdist/
58
+ var/
59
+ wheels/
60
+ share/python-wheels/
61
+ *.egg-info/
62
+ .installed.cfg
63
+ *.egg
64
+ MANIFEST
65
+
66
+ # PyInstaller
67
+ # Usually these files are written by a python script from a template
68
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
69
+ *.manifest
70
+ *.spec
71
+
72
+ # Installer logs
73
+ pip-log.txt
74
+ pip-delete-this-directory.txt
75
+
76
+ # Unit test / coverage reports
77
+ htmlcov/
78
+ .tox/
79
+ .nox/
80
+ .coverage
81
+ .coverage.*
82
+ .cache
83
+ nosetests.xml
84
+ coverage.xml
85
+ *.cover
86
+ *.py,cover
87
+ .hypothesis/
88
+ .pytest_cache/
89
+ cover/
90
+
91
+ # Translations
92
+ *.mo
93
+ *.pot
94
+
95
+ # Django stuff:
96
+ *.log
97
+ local_settings.py
98
+ db.sqlite3
99
+ db.sqlite3-journal
100
+
101
+ # Flask stuff:
102
+ instance/
103
+ .webassets-cache
104
+
105
+ # Scrapy stuff:
106
+ .scrapy
107
+
108
+ # Sphinx documentation
109
+ docs/_build/
110
+
111
+ # PyBuilder
112
+ .pybuilder/
113
+ target/
114
+
115
+ # Jupyter Notebook
116
+ .ipynb_checkpoints
117
+
118
+ # IPython
119
+ profile_default/
120
+ ipython_config.py
121
+
122
+ # pyenv
123
+ # For a library or package, you might want to ignore these files since the code is
124
+ # intended to run in multiple environments; otherwise, check them in:
125
+ # .python-version
126
+
127
+ # pipenv
128
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
129
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
130
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
131
+ # install all needed dependencies.
132
+ #Pipfile.lock
133
+
134
+ # poetry
135
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
136
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
137
+ # commonly ignored for libraries.
138
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
139
+ #poetry.lock
140
+
141
+ # pdm
142
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
143
+ #pdm.lock
144
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
145
+ # in version control.
146
+ # https://pdm.fming.dev/#use-with-ide
147
+ .pdm.toml
148
+
149
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
150
+ __pypackages__/
151
+
152
+ # Celery stuff
153
+ celerybeat-schedule
154
+ celerybeat.pid
155
+
156
+ # SageMath parsed files
157
+ *.sage.py
158
+
159
+ # Environments
160
+ .env
161
+ .venv
162
+ env/
163
+ venv/
164
+ ENV/
165
+ env.bak/
166
+ venv.bak/
167
+
168
+ # Spyder project settings
169
+ .spyderproject
170
+ .spyproject
171
+
172
+ # Rope project settings
173
+ .ropeproject
174
+
175
+ # mkdocs documentation
176
+ /site
177
+
178
+ # mypy
179
+ .mypy_cache/
180
+ .dmypy.json
181
+ dmypy.json
182
+
183
+ # Pyre type checker
184
+ .pyre/
185
+
186
+ # pytype static type analyzer
187
+ .pytype/
188
+
189
+ # Cython debug symbols
190
+ cython_debug/
191
+
192
+ # PyCharm
193
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
194
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
195
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
196
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
197
+ #.idea/
198
+
199
+ ### Python Patch ###
200
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
201
+ poetry.toml
202
+
203
+ # ruff
204
+ .ruff_cache/
205
+
206
+ # LSP config files
207
+ pyrightconfig.json
208
+
209
+ ### VisualStudioCode ###
210
+ .vscode/*
211
+ !.vscode/settings.json
212
+ !.vscode/tasks.json
213
+ !.vscode/launch.json
214
+ !.vscode/extensions.json
215
+ !.vscode/*.code-snippets
216
+
217
+ # Local History for Visual Studio Code
218
+ .history/
219
+
220
+ # Built Visual Studio Code Extensions
221
+ *.vsix
222
+
223
+ ### VisualStudioCode Patch ###
224
+ # Ignore all local history of files
225
+ .history
226
+ .ionide
227
+
228
+ ### Windows ###
229
+ # Windows thumbnail cache files
230
+ Thumbs.db
231
+ Thumbs.db:encryptable
232
+ ehthumbs.db
233
+ ehthumbs_vista.db
234
+
235
+ # Dump file
236
+ *.stackdump
237
+
238
+ # Folder config file
239
+ [Dd]esktop.ini
240
+
241
+ # Recycle Bin used on file shares
242
+ $RECYCLE.BIN/
243
+
244
+ # Windows Installer files
245
+ *.cab
246
+ *.msi
247
+ *.msix
248
+ *.msm
249
+ *.msp
250
+
251
+ # Windows shortcuts
252
+ *.lnk
253
+
254
+ # End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,macos,windows
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Original code by https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard
2
+ # Modified by EffiBench
3
+
4
+ from leaderboard import build_leaderboard_tab
5
+ from utils import build_logger
6
+ from css_html_js import custom_css
7
+
8
+ import argparse
9
+ import glob
10
+ import gradio as gr
11
+
12
+
13
+ def build_demo(leaderboard_table_file):
14
+ text_size = gr.themes.sizes.text_lg
15
+ # load theme from theme.json
16
+ theme = gr.themes.Default.load("theme.json")
17
+ # set text size to large
18
+ theme.text_size = text_size
19
+ theme.set(
20
+ button_large_text_size="40px",
21
+ button_small_text_size="40px",
22
+ button_large_text_weight="1000",
23
+ button_small_text_weight="1000",
24
+ button_shadow="*shadow_drop_lg",
25
+ button_shadow_hover="*shadow_drop_lg",
26
+ checkbox_label_shadow="*shadow_drop_lg",
27
+ button_shadow_active="*shadow_inset",
28
+ button_secondary_background_fill="*primary_300",
29
+ button_secondary_background_fill_dark="*primary_700",
30
+ button_secondary_background_fill_hover="*primary_200",
31
+ button_secondary_background_fill_hover_dark="*primary_500",
32
+ button_secondary_text_color="*primary_800",
33
+ button_secondary_text_color_dark="white",
34
+ )
35
+
36
+ with gr.Blocks(
37
+ title="EffiBench Leaderboard",
38
+ theme=theme,
39
+ css=custom_css,
40
+ ) as demo:
41
+ build_leaderboard_tab(leaderboard_table_file)
42
+ return demo
43
+
44
+
45
+ parser = argparse.ArgumentParser()
46
+ parser.add_argument("--share", action="store_true")
47
+ parser.add_argument("--host", default="0.0.0.0")
48
+ parser.add_argument("--port", type=int, default=7860)
49
+ args = parser.parse_args()
50
+
51
+ logger = build_logger("monitor", "monitor.log")
52
+ logger.info(f"args: {args}")
53
+
54
+ leaderboard_table_files = glob.glob("leaderboard_table_*.csv")
55
+ leaderboard_table_files.sort(key=lambda x: int(x[18:-4]))
56
+ leaderboard_table_file = leaderboard_table_files[-1]
57
+
58
+ demo = build_demo(leaderboard_table_file)
59
+ demo.launch(share=args.share, server_name=args.host, server_port=args.port)
css_html_js.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Original code by https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard
2
+ # Modified by EffiBench
3
+
4
+ custom_css = """
5
+ #notice_markdown .prose {
6
+ font-size: 110% !important;
7
+ }
8
+ #notice_markdown th {
9
+ display: none;
10
+ }
11
+ #notice_markdown td {
12
+ padding-top: 6px;
13
+ padding-bottom: 6px;
14
+ }
15
+ #arena_leaderboard_dataframe table {
16
+ font-size: 110%;
17
+ }
18
+ #full_leaderboard_dataframe table {
19
+ font-size: 110%;
20
+ }
21
+ #model_description_markdown {
22
+ font-size: 110% !important;
23
+ }
24
+ #leaderboard_markdown .prose {
25
+ font-size: 110% !important;
26
+ }
27
+ #leaderboard_markdown td {
28
+ padding-top: 6px;
29
+ padding-bottom: 6px;
30
+ }
31
+ #leaderboard_dataframe td {
32
+ line-height: 0.1em;
33
+ }
34
+ #about_markdown .prose {
35
+ font-size: 110% !important;
36
+ }
37
+ #ack_markdown .prose {
38
+ font-size: 110% !important;
39
+ }
40
+ #chatbot .prose {
41
+ font-size: 105% !important;
42
+ }
43
+ .sponsor-image-about img {
44
+ margin: 0 20px;
45
+ margin-top: 20px;
46
+ height: 40px;
47
+ max-height: 100%;
48
+ width: auto;
49
+ float: left;
50
+ }
51
+
52
+ .chatbot h1, h2, h3 {
53
+ margin-top: 8px; /* Adjust the value as needed */
54
+ margin-bottom: 0px; /* Adjust the value as needed */
55
+ padding-bottom: 0px;
56
+ }
57
+
58
+ .chatbot h1 {
59
+ font-size: 130%;
60
+ }
61
+ .chatbot h2 {
62
+ font-size: 120%;
63
+ }
64
+ .chatbot h3 {
65
+ font-size: 110%;
66
+ }
67
+ .chatbot p:not(:first-child) {
68
+ margin-top: 8px;
69
+ }
70
+
71
+ .typing {
72
+ display: inline-block;
73
+ }
74
+
75
+ .cursor {
76
+ display: inline-block;
77
+ width: 7px;
78
+ height: 1em;
79
+ background-color: black;
80
+ vertical-align: middle;
81
+ animation: blink 1s infinite;
82
+ }
83
+
84
+ .dark .cursor {
85
+ display: inline-block;
86
+ width: 7px;
87
+ height: 1em;
88
+ background-color: white;
89
+ vertical-align: middle;
90
+ animation: blink 1s infinite;
91
+ }
92
+
93
+ @keyframes blink {
94
+ 0%, 50% { opacity: 1; }
95
+ 50.1%, 100% { opacity: 0; }
96
+ }
97
+
98
+ .app {
99
+ max-width: 100% !important;
100
+ padding: 20px !important;
101
+ }
102
+
103
+ a {
104
+ color: #1976D2; /* Your current link color, a shade of blue */
105
+ text-decoration: none; /* Removes underline from links */
106
+ }
107
+ a:hover {
108
+ color: #63A4FF; /* This can be any color you choose for hover */
109
+ text-decoration: underline; /* Adds underline on hover */
110
+ }
111
+ """
leaderboard.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Original code by https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard
2
+ # Modified by EffiBench
3
+
4
+ import pandas as pd
5
+ import gradio as gr
6
+
7
+
8
+ def make_default_md_1():
9
+ link_color = "#1976D2" # This color should be clear in both light and dark mode
10
+ leaderboard_md = f"""
11
+ # 🏆 LMSYS Chatbot Arena Leaderboard
12
+ <a href='https://lmsys.org/blog/2023-05-03-arena/' style='color: {link_color}; text-decoration: none;'>Blog</a> |
13
+ <a href='https://arxiv.org/abs/2403.04132' style='color: {link_color}; text-decoration: none;'>Paper</a> |
14
+ <a href='https://github.com/lm-sys/FastChat' style='color: {link_color}; text-decoration: none;'>GitHub</a> |
15
+ <a href='https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md' style='color: {link_color}; text-decoration: none;'>Dataset</a> |
16
+ <a href='https://twitter.com/lmsysorg' style='color: {link_color}; text-decoration: none;'>Twitter</a> |
17
+ <a href='https://discord.gg/HSWAKCrnFx' style='color: {link_color}; text-decoration: none;'>Discord</a>
18
+ """
19
+
20
+ return leaderboard_md
21
+
22
+ def make_default_md_2():
23
+ leaderboard_md = f"""
24
+ LMSYS Chatbot Arena is a crowdsourced open platform for LLM evals. We've collected over 800,000 human pairwise comparisons to rank LLMs with the Bradley-Terry model and display the model ratings in Elo-scale.
25
+ You can find more details in our paper. **Chatbot arena is dependent on community participation, please contribute by casting your vote!**
26
+ """
27
+
28
+ return leaderboard_md
29
+
30
+ leaderboard_md = """
31
+ Three benchmarks are displayed: **Arena Elo**, **MT-Bench** and **MMLU**.
32
+ - [Chatbot Arena](https://chat.lmsys.org/?arena) - a crowdsourced, randomized battle platform. We use 500K+ user votes to compute model strength.
33
+ - [MT-Bench](https://arxiv.org/abs/2306.05685): a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
34
+ - [MMLU](https://arxiv.org/abs/2009.03300) (5-shot): a test to measure a model's multitask accuracy on 57 tasks.
35
+
36
+ 💻 Code: The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge).
37
+ The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval).
38
+ Higher values are better for all benchmarks. Empty cells mean not available.
39
+ """
40
+
41
+ acknowledgment_md = """
42
+ ### Terms of Service
43
+
44
+ Users are required to agree to the following terms before using the service:
45
+
46
+ The service is a research preview. It only provides limited safety measures and may generate offensive content.
47
+ It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
48
+ Please do not upload any private information.
49
+ The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license.
50
+
51
+ ### Acknowledgment
52
+ We thank [UC Berkeley SkyLab](https://sky.cs.berkeley.edu/), [Kaggle](https://www.kaggle.com/), [MBZUAI](https://mbzuai.ac.ae/), [a16z](https://www.a16z.com/), [Together AI](https://www.together.ai/), [Hyperbolic](https://hyperbolic.xyz/), [Anyscale](https://www.anyscale.com/), [HuggingFace](https://huggingface.co/) for their generous [sponsorship](https://lmsys.org/donations/).
53
+
54
+ <div class="sponsor-image-about">
55
+ <img src="https://storage.googleapis.com/public-arena-asset/skylab.png" alt="SkyLab">
56
+ <img src="https://storage.googleapis.com/public-arena-asset/kaggle.png" alt="Kaggle">
57
+ <img src="https://storage.googleapis.com/public-arena-asset/mbzuai.jpeg" alt="MBZUAI">
58
+ <img src="https://storage.googleapis.com/public-arena-asset/a16z.jpeg" alt="a16z">
59
+ <img src="https://storage.googleapis.com/public-arena-asset/together.png" alt="Together AI">
60
+ <img src="https://storage.googleapis.com/public-arena-asset/hyperbolic_logo.png" alt="Hyperbolic">
61
+ <img src="https://storage.googleapis.com/public-arena-asset/anyscale.png" alt="AnyScale">
62
+ <img src="https://storage.googleapis.com/public-arena-asset/huggingface.png" alt="HuggingFace">
63
+ </div>
64
+ """
65
+
66
+ citation_md = """
67
+ ### Citation
68
+ Please cite the following paper if you find our leaderboard or dataset helpful.
69
+ ```
70
+ @misc{chiang2024chatbot,
71
+ title={Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference},
72
+ author={Wei-Lin Chiang and Lianmin Zheng and Ying Sheng and Anastasios Nikolas Angelopoulos and Tianle Li and Dacheng Li and Hao Zhang and Banghua Zhu and Michael Jordan and Joseph E. Gonzalez and Ion Stoica},
73
+ year={2024},
74
+ eprint={2403.04132},
75
+ archivePrefix={arXiv},
76
+ primaryClass={cs.AI}
77
+ }
78
+ """
79
+
80
+ def build_leaderboard_tab(leaderboard_table_file):
81
+ gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown")
82
+ gr.Markdown(make_default_md_2(), elem_id="leaderboard_markdown")
83
+
84
+ df = pd.read_csv(leaderboard_table_file)
85
+
86
+ def filter_leaderboard(timeout, dataset):
87
+ filtered_df = df[(df['Timeout'] == timeout) & (df['Dataset'] == dataset)]
88
+ return filtered_df.drop(columns=['Timeout', 'Dataset'])
89
+
90
+ timeouts = df['Timeout'].unique().tolist()
91
+ datasets = df['Dataset'].unique().tolist()
92
+
93
+ with gr.Tab("Leaderboard"):
94
+ gr.Markdown(leaderboard_md, elem_id="leaderboard_markdown")
95
+ with gr.Row():
96
+ timeout_dropdown = gr.Dropdown(label="Timeout", choices=timeouts, value=timeouts[0])
97
+ dataset_dropdown = gr.Dropdown(label="Dataset", choices=datasets, value=datasets[0])
98
+
99
+ initial_data = filter_leaderboard(timeouts[0], datasets[0])
100
+ leaderboard = gr.Dataframe(value=initial_data)
101
+
102
+ def update_leaderboard(timeout, dataset):
103
+ filtered_data = filter_leaderboard(timeout, dataset)
104
+ leaderboard.update(value=filtered_data)
105
+
106
+ timeout_dropdown.change(update_leaderboard, [timeout_dropdown, dataset_dropdown], leaderboard)
107
+ dataset_dropdown.change(update_leaderboard, [timeout_dropdown, dataset_dropdown], leaderboard)
108
+
109
+ with gr.Accordion("Citation", open=True):
110
+ gr.Markdown(citation_md, elem_id="leaderboard_markdown")
111
+ gr.Markdown(acknowledgment_md, elem_id="ack_markdown")
leaderboard_table_20240520.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Timeout,Dataset,ET,NET,MU,NMU,TMU,NTMU
2
+ OpenCodeInterpreter-DS-1.3B,10,HumanEval,0.2,0.86,57.24,1,6.63,0.84
3
+ OpenCodeInterpreter-DS-6.7B,10,HumanEval,0.21,0.98,58.83,1.06,6.79,0.99
4
+ OpenCodeInterpreter-DS-33B,10,HumanEval,0.21,0.95,59.9,1.05,7.05,0.94
5
+ deepseek-coder-1.3b-instruct,10,HumanEval,0.23,0.9,62.8,1,7.85,0.87
6
+ deepseek-coder-6.7b-instruct,10,HumanEval,0.22,0.76,59.57,1,7.34,0.77
7
+ deepseek-coder-33b-instruct,10,HumanEval,0.21,0.95,63.52,0.99,7.18,0.95
8
+ CodeLlama-7b-Instruct-hf,10,HumanEval,0.2,0.71,57.39,0.91,7.08,0.7
9
+ CodeLlama-13b-Instruct-hf,10,HumanEval,0.23,0.95,58.13,0.96,7.97,0.94
10
+ CodeLlama-34b-Instruct-hf,10,HumanEval,0.24,0.95,61.79,1.01,8.45,0.96
11
+ CodeLlama-70b-Instruct-hf,10,HumanEval,0.21,0.93,60.19,1.01,6.76,1.01
12
+ XwinCoder-13B,10,HumanEval,0.27,1.08,61.14,1.04,9.25,1.09
13
+ XwinCoder-34B,10,HumanEval,0.25,1.07,60.75,1.05,8.46,1.08
14
+ WizardCoder-Python-7B-V1.0-GPTQ,10,HumanEval,0.21,0.91,58.59,1.01,6.63,0.89
15
+ WizardCoder-Python-13B-V1.0-GPTQ,10,HumanEval,0.21,0.81,60.59,1,7.22,0.79
16
+ WizardCoder-Python-34B-V1.0-GPTQ,10,HumanEval,0.22,0.79,58.13,1,7.1,0.78
17
+ starcoder2-3b,10,HumanEval,0.24,1.02,62.45,1,7.73,0.89
18
+ starcoder2-7b,10,HumanEval,0.21,0.89,62.53,1,7.41,0.85
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ plotly
theme.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"theme": {"text_size": "20px", "background_fill_primary": "white", "background_fill_primary_dark": "*neutral_950", "background_fill_secondary": "*neutral_50", "background_fill_secondary_dark": "*neutral_900", "block_background_fill": "*background_fill_primary", "block_background_fill_dark": "*neutral_800", "block_border_color": "*border_color_primary", "block_border_color_dark": "*border_color_primary", "block_border_width": "1px", "block_border_width_dark": "1px", "block_info_text_color": "*body_text_color_subdued", "block_info_text_color_dark": "*body_text_color_subdued", "block_info_text_size": "*text_sm", "block_info_text_weight": "400", "block_label_background_fill": "*background_fill_primary", "block_label_background_fill_dark": "*background_fill_secondary", "block_label_border_color": "*border_color_primary", "block_label_border_color_dark": "*border_color_primary", "block_label_border_width": "1px", "block_label_border_width_dark": "1px", "block_label_margin": "0", "block_label_padding": "*spacing_sm *spacing_lg", "block_label_radius": "calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px) 0", "block_label_right_radius": "0 calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px)", "block_label_shadow": "*block_shadow", "block_label_text_color": "*neutral_500", "block_label_text_color_dark": "*neutral_200", "block_label_text_size": "*text_sm", "block_label_text_weight": "400", "block_padding": "*spacing_xl calc(*spacing_xl + 2px)", "block_radius": "*radius_lg", "block_shadow": "none", "block_shadow_dark": "none", "block_title_background_fill": "none", "block_title_background_fill_dark": "none", "block_title_border_color": "none", "block_title_border_color_dark": "none", "block_title_border_width": "0px", "block_title_border_width_dark": "0px", "block_title_padding": "0", "block_title_radius": "none", "block_title_text_color": "*neutral_500", "block_title_text_color_dark": "*neutral_200", "block_title_text_size": "*text_md", "block_title_text_weight": "400", "body_background_fill": "*background_fill_primary", "body_background_fill_dark": "*background_fill_primary", "body_text_color": "*neutral_700", "body_text_color_dark": "*neutral_200", "body_text_color_subdued": "*neutral_400", "body_text_color_subdued_dark": "*neutral_500", "body_text_size": "*text_md", "body_text_weight": "400", "border_color_accent": "*primary_300", "border_color_accent_dark": "*neutral_600", "border_color_primary": "*neutral_200", "border_color_primary_dark": "*neutral_700", "button_border_width": "*input_border_width", "button_border_width_dark": "*input_border_width", "button_cancel_background_fill": "*button_secondary_background_fill", "button_cancel_background_fill_dark": "*button_secondary_background_fill", "button_cancel_background_fill_hover": "*button_cancel_background_fill", "button_cancel_background_fill_hover_dark": "*button_cancel_background_fill", "button_cancel_border_color": "*button_secondary_border_color", "button_cancel_border_color_dark": "*button_secondary_border_color", "button_cancel_border_color_hover": "*button_cancel_border_color", "button_cancel_border_color_hover_dark": "*button_cancel_border_color", "button_cancel_text_color": "*button_secondary_text_color", "button_cancel_text_color_dark": "*button_secondary_text_color", "button_cancel_text_color_hover": "*button_cancel_text_color", "button_cancel_text_color_hover_dark": "*button_cancel_text_color", "button_large_padding": "*spacing_lg calc(2 * *spacing_lg)", "button_large_radius": "*radius_lg", "button_large_text_size": "*text_lg", "button_large_text_weight": "500", "button_primary_background_fill": "*primary_200", "button_primary_background_fill_dark": "*primary_700", "button_primary_background_fill_hover": "*button_primary_background_fill", "button_primary_background_fill_hover_dark": "*button_primary_background_fill", "button_primary_border_color": "*primary_200", "button_primary_border_color_dark": "*primary_600", "button_primary_border_color_hover": "*button_primary_border_color", "button_primary_border_color_hover_dark": "*button_primary_border_color", "button_primary_text_color": "*primary_600", "button_primary_text_color_dark": "white", "button_primary_text_color_hover": "*button_primary_text_color", "button_primary_text_color_hover_dark": "*button_primary_text_color", "button_secondary_background_fill": "*neutral_200", "button_secondary_background_fill_dark": "*neutral_600", "button_secondary_background_fill_hover": "*neutral_300", "button_secondary_background_fill_hover_dark": "*neutral_500", "button_secondary_border_color": "*neutral_200", "button_secondary_border_color_dark": "*neutral_600", "button_secondary_border_color_hover": "*button_secondary_border_color", "button_secondary_border_color_hover_dark": "*button_secondary_border_color", "button_secondary_text_color": "*neutral_700", "button_secondary_text_color_dark": "white", "button_secondary_text_color_hover": "*button_secondary_text_color", "button_secondary_text_color_hover_dark": "*button_secondary_text_color", "button_shadow": "none", "button_shadow_active": "none", "button_shadow_hover": "none", "button_small_padding": "*spacing_sm calc(2 * *spacing_sm)", "button_small_radius": "*radius_lg", "button_small_text_size": "*text_md", "button_small_text_weight": "400", "button_transition": "background-color 0.2s ease", "checkbox_background_color": "*background_fill_primary", "checkbox_background_color_dark": "*neutral_800", "checkbox_background_color_focus": "*checkbox_background_color", "checkbox_background_color_focus_dark": "*checkbox_background_color", "checkbox_background_color_hover": "*checkbox_background_color", "checkbox_background_color_hover_dark": "*checkbox_background_color", "checkbox_background_color_selected": "*secondary_600", "checkbox_background_color_selected_dark": "*secondary_600", "checkbox_border_color": "*neutral_300", "checkbox_border_color_dark": "*neutral_700", "checkbox_border_color_focus": "*secondary_500", "checkbox_border_color_focus_dark": "*secondary_500", "checkbox_border_color_hover": "*neutral_300", "checkbox_border_color_hover_dark": "*neutral_600", "checkbox_border_color_selected": "*secondary_600", "checkbox_border_color_selected_dark": "*secondary_600", "checkbox_border_radius": "*radius_sm", "checkbox_border_width": "*input_border_width", "checkbox_border_width_dark": "*input_border_width", "checkbox_check": "url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e\")", "checkbox_label_background_fill": "*button_secondary_background_fill", "checkbox_label_background_fill_dark": "*button_secondary_background_fill", "checkbox_label_background_fill_hover": "*button_secondary_background_fill_hover", "checkbox_label_background_fill_hover_dark": "*button_secondary_background_fill_hover", "checkbox_label_background_fill_selected": "*checkbox_label_background_fill", "checkbox_label_background_fill_selected_dark": "*checkbox_label_background_fill", "checkbox_label_border_color": "*border_color_primary", "checkbox_label_border_color_dark": "*border_color_primary", "checkbox_label_border_color_hover": "*checkbox_label_border_color", "checkbox_label_border_color_hover_dark": "*checkbox_label_border_color", "checkbox_label_border_width": "*input_border_width", "checkbox_label_border_width_dark": "*input_border_width", "checkbox_label_gap": "*spacing_lg", "checkbox_label_padding": "*spacing_md calc(2 * *spacing_md)", "checkbox_label_shadow": "none", "checkbox_label_text_color": "*body_text_color", "checkbox_label_text_color_dark": "*body_text_color", "checkbox_label_text_color_selected": "*checkbox_label_text_color", "checkbox_label_text_color_selected_dark": "*checkbox_label_text_color", "checkbox_label_text_size": "*text_md", "checkbox_label_text_weight": "400", "checkbox_shadow": "*input_shadow", "color_accent": "*primary_500", "color_accent_soft": "*primary_50", "color_accent_soft_dark": "*neutral_700", "container_radius": "*radius_lg", "embed_radius": "*radius_md", "error_background_fill": "#fee2e2", "error_background_fill_dark": "*background_fill_primary", "error_border_color": "#fecaca", "error_border_color_dark": "*border_color_primary", "error_border_width": "1px", "error_border_width_dark": "1px", "error_text_color": "#ef4444", "error_text_color_dark": "#ef4444", "form_gap_width": "0px", "input_background_fill": "*neutral_100", "input_background_fill_dark": "*neutral_700", "input_background_fill_focus": "*secondary_500", "input_background_fill_focus_dark": "*secondary_600", "input_background_fill_hover": "*input_background_fill", "input_background_fill_hover_dark": "*input_background_fill", "input_border_color": "*border_color_primary", "input_border_color_dark": "*border_color_primary", "input_border_color_focus": "*secondary_300", "input_border_color_focus_dark": "*neutral_700", "input_border_color_hover": "*input_border_color", "input_border_color_hover_dark": "*input_border_color", "input_border_width": "0px", "input_border_width_dark": "0px", "input_padding": "*spacing_xl", "input_placeholder_color": "*neutral_400", "input_placeholder_color_dark": "*neutral_500", "input_radius": "*radius_lg", "input_shadow": "none", "input_shadow_dark": "none", "input_shadow_focus": "*input_shadow", "input_shadow_focus_dark": "*input_shadow", "input_text_size": "*text_md", "input_text_weight": "400", "layout_gap": "*spacing_xxl", "link_text_color": "*secondary_600", "link_text_color_active": "*secondary_600", "link_text_color_active_dark": "*secondary_500", "link_text_color_dark": "*secondary_500", "link_text_color_hover": "*secondary_700", "link_text_color_hover_dark": "*secondary_400", "link_text_color_visited": "*secondary_500", "link_text_color_visited_dark": "*secondary_600", "loader_color": "*color_accent", "loader_color_dark": "*color_accent", "name": "base", "neutral_100": "#f5f5f4", "neutral_200": "#e7e5e4", "neutral_300": "#d6d3d1", "neutral_400": "#a8a29e", "neutral_50": "#fafaf9", "neutral_500": "#78716c", "neutral_600": "#57534e", "neutral_700": "#44403c", "neutral_800": "#292524", "neutral_900": "#1c1917", "neutral_950": "#0f0e0d", "panel_background_fill": "*background_fill_secondary", "panel_background_fill_dark": "*background_fill_secondary", "panel_border_color": "*border_color_primary", "panel_border_color_dark": "*border_color_primary", "panel_border_width": "0", "panel_border_width_dark": "0", "primary_100": "#e0f2fe", "primary_200": "#bae6fd", "primary_300": "#7dd3fc", "primary_400": "#38bdf8", "primary_50": "#f0f9ff", "primary_500": "#0ea5e9", "primary_600": "#0284c7", "primary_700": "#0369a1", "primary_800": "#075985", "primary_900": "#0c4a6e", "primary_950": "#0b4165", "prose_header_text_weight": "500", "prose_text_size": "*text_md", "prose_text_weight": "400", "radio_circle": "url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e\")", "radius_lg": "3px", "radius_md": "3px", "radius_sm": "3px", "radius_xl": "3px", "radius_xs": "3px", "radius_xxl": "3px", "radius_xxs": "3px", "secondary_100": "#e0f2fe", "secondary_200": "#bae6fd", "secondary_300": "#7dd3fc", "secondary_400": "#38bdf8", "secondary_50": "#f0f9ff", "secondary_500": "#0ea5e9", "secondary_600": "#0284c7", "secondary_700": "#0369a1", "secondary_800": "#075985", "secondary_900": "#0c4a6e", "secondary_950": "#0b4165", "section_header_text_size": "*text_md", "section_header_text_weight": "400", "shadow_drop": "rgba(0,0,0,0.05) 0px 1px 2px 0px", "shadow_drop_lg": "0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)", "shadow_inset": "rgba(0,0,0,0.05) 0px 2px 4px 0px inset", "shadow_spread": "3px", "shadow_spread_dark": "1px", "slider_color": "*primary_600", "slider_color_dark": "*primary_600", "spacing_lg": "8px", "spacing_md": "6px", "spacing_sm": "4px", "spacing_xl": "10px", "spacing_xs": "2px", "spacing_xxl": "16px", "spacing_xxs": "1px", "stat_background_fill": "*primary_300", "stat_background_fill_dark": "*primary_500", "table_border_color": "*neutral_300", "table_border_color_dark": "*neutral_700", "table_even_background_fill": "white", "table_even_background_fill_dark": "*neutral_950", "table_odd_background_fill": "*neutral_50", "table_odd_background_fill_dark": "*neutral_900", "table_radius": "*radius_lg", "table_row_focus": "*color_accent_soft", "table_row_focus_dark": "*color_accent_soft", "text_lg": "20px", "text_md": "16px", "text_sm": "14px", "text_xl": "24px", "text_xs": "12px", "text_xxl": "28px", "text_xxs": "10px"}, "version": "0.0.1"}
utils.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Common utilities.
3
+ """
4
+ from asyncio import AbstractEventLoop
5
+ from io import BytesIO
6
+ import base64
7
+ import json
8
+ import logging
9
+ import logging.handlers
10
+ import os
11
+ import platform
12
+ import sys
13
+ import time
14
+ from typing import AsyncGenerator, Generator
15
+ import warnings
16
+
17
+ import requests
18
+
19
+ LOGDIR = "./"
20
+
21
+
22
+ handler = None
23
+ visited_loggers = set()
24
+
25
+
26
+ def build_logger(logger_name, logger_filename):
27
+ global handler
28
+
29
+ formatter = logging.Formatter(
30
+ fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
31
+ datefmt="%Y-%m-%d %H:%M:%S",
32
+ )
33
+
34
+ # Set the format of root handlers
35
+ if not logging.getLogger().handlers:
36
+ if sys.version_info[1] >= 9:
37
+ # This is for windows
38
+ logging.basicConfig(level=logging.INFO, encoding="utf-8")
39
+ else:
40
+ if platform.system() == "Windows":
41
+ warnings.warn(
42
+ "If you are running on Windows, "
43
+ "we recommend you use Python >= 3.9 for UTF-8 encoding."
44
+ )
45
+ logging.basicConfig(level=logging.INFO)
46
+ logging.getLogger().handlers[0].setFormatter(formatter)
47
+
48
+ # Redirect stdout and stderr to loggers
49
+ stdout_logger = logging.getLogger("stdout")
50
+ stdout_logger.setLevel(logging.INFO)
51
+ sl = StreamToLogger(stdout_logger, logging.INFO)
52
+ sys.stdout = sl
53
+
54
+ stderr_logger = logging.getLogger("stderr")
55
+ stderr_logger.setLevel(logging.ERROR)
56
+ sl = StreamToLogger(stderr_logger, logging.ERROR)
57
+ sys.stderr = sl
58
+
59
+ # Get logger
60
+ logger = logging.getLogger(logger_name)
61
+ logger.setLevel(logging.INFO)
62
+
63
+ # Avoid httpx flooding POST logs
64
+ logging.getLogger("httpx").setLevel(logging.WARNING)
65
+
66
+ # if LOGDIR is empty, then don't try output log to local file
67
+ if LOGDIR != "":
68
+ os.makedirs(LOGDIR, exist_ok=True)
69
+ filename = os.path.join(LOGDIR, logger_filename)
70
+ handler = logging.handlers.TimedRotatingFileHandler(
71
+ filename, when="D", utc=True, encoding="utf-8"
72
+ )
73
+ handler.setFormatter(formatter)
74
+
75
+ for l in [stdout_logger, stderr_logger, logger]:
76
+ if l in visited_loggers:
77
+ continue
78
+ visited_loggers.add(l)
79
+ l.addHandler(handler)
80
+
81
+ return logger
82
+
83
+ class StreamToLogger(object):
84
+ """
85
+ Fake file-like stream object that redirects writes to a logger instance.
86
+ """
87
+
88
+ def __init__(self, logger, log_level=logging.INFO):
89
+ self.terminal = sys.stdout
90
+ self.logger = logger
91
+ self.log_level = log_level
92
+ self.linebuf = ""
93
+
94
+ def __getattr__(self, attr):
95
+ return getattr(self.terminal, attr)
96
+
97
+ def write(self, buf):
98
+ temp_linebuf = self.linebuf + buf
99
+ self.linebuf = ""
100
+ for line in temp_linebuf.splitlines(True):
101
+ # From the io.TextIOWrapper docs:
102
+ # On output, if newline is None, any '\n' characters written
103
+ # are translated to the system default line separator.
104
+ # By default sys.stdout.write() expects '\n' newlines and then
105
+ # translates them so this is still cross platform.
106
+ if line[-1] == "\n":
107
+ encoded_message = line.encode("utf-8", "ignore").decode("utf-8")
108
+ self.logger.log(self.log_level, encoded_message.rstrip())
109
+ else:
110
+ self.linebuf += line
111
+
112
+ def flush(self):
113
+ if self.linebuf != "":
114
+ encoded_message = self.linebuf.encode("utf-8", "ignore").decode("utf-8")
115
+ self.logger.log(self.log_level, encoded_message.rstrip())
116
+ self.linebuf = ""