Rename
Browse files- .gitignore +1 -1
- README.md +2 -2
- app.py +1 -1
- images/{solbench.svg → soliditybench.svg} +67 -45
- soliditybench.svg +105 -0
- src/about.py +1 -1
- src/display/utils.py +1 -1
- src/envs.py +1 -1
- src/leaderboard/read_evals.py +3 -3
- src/populate.py +1 -1
.gitignore
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
ignore/
|
3 |
auto_evals/
|
4 |
venv/
|
|
|
1 |
+
soliditybench/
|
2 |
ignore/
|
3 |
auto_evals/
|
4 |
venv/
|
README.md
CHANGED
@@ -7,11 +7,11 @@ sdk: gradio
|
|
7 |
app_file: app.py
|
8 |
pinned: true
|
9 |
datasets:
|
10 |
-
- braindao/
|
11 |
- braindao/humaneval-for-solidity-25
|
12 |
license: apache-2.0
|
13 |
sdk_version: 4.40.0
|
14 |
thumbnail: >-
|
15 |
https://cdn-uploads.huggingface.co/production/uploads/5f19edf678d261307936f4c8/4v6TPbN8qa6JptyCFUy-J.png
|
16 |
-
short_description:
|
17 |
---
|
|
|
7 |
app_file: app.py
|
8 |
pinned: true
|
9 |
datasets:
|
10 |
+
- braindao/soliditybench-naive-judge-openzeppelin-v1
|
11 |
- braindao/humaneval-for-solidity-25
|
12 |
license: apache-2.0
|
13 |
sdk_version: 4.40.0
|
14 |
thumbnail: >-
|
15 |
https://cdn-uploads.huggingface.co/production/uploads/5f19edf678d261307936f4c8/4v6TPbN8qa6JptyCFUy-J.png
|
16 |
+
short_description: SolidityBench Leaderboard
|
17 |
---
|
app.py
CHANGED
@@ -254,5 +254,5 @@ scheduler.add_job(restart_space, "interval", seconds=900)
|
|
254 |
scheduler.start()
|
255 |
demo.queue(default_concurrency_limit=40).launch(
|
256 |
server_name="0.0.0.0",
|
257 |
-
allowed_paths=["images/
|
258 |
)
|
|
|
254 |
scheduler.start()
|
255 |
demo.queue(default_concurrency_limit=40).launch(
|
256 |
server_name="0.0.0.0",
|
257 |
+
allowed_paths=["images/soliditybench.svg"],
|
258 |
)
|
images/{solbench.svg → soliditybench.svg}
RENAMED
File without changes
|
soliditybench.svg
ADDED
src/about.py
CHANGED
@@ -29,7 +29,7 @@ class Tasks(Enum):
|
|
29 |
# ---------------------------------------------------
|
30 |
|
31 |
# Your leaderboard name
|
32 |
-
TITLE = """<br><img src="file/images/
|
33 |
<h2 align="center" id="space-title">Solidity Leaderboard | Powered by IQ</h2>"""
|
34 |
|
35 |
# What does your leaderboard evaluate?
|
|
|
29 |
# ---------------------------------------------------
|
30 |
|
31 |
# Your leaderboard name
|
32 |
+
TITLE = """<br><img src="file/images/soliditybench.svg" width="500" style="display: block; margin-left: auto; margin-right: auto;">
|
33 |
<h2 align="center" id="space-title">Solidity Leaderboard | Powered by IQ</h2>"""
|
34 |
|
35 |
# What does your leaderboard evaluate?
|
src/display/utils.py
CHANGED
@@ -38,7 +38,7 @@ auto_eval_column_dict = [
|
|
38 |
"", "str", True, never_hidden=True)),
|
39 |
("model", ColumnContent, create_column_content(
|
40 |
"Model", "markdown", True, never_hidden=True)),
|
41 |
-
("
|
42 |
# ("average", ColumnContent, create_column_content("Average", "number", True)),
|
43 |
]
|
44 |
|
|
|
38 |
"", "str", True, never_hidden=True)),
|
39 |
("model", ColumnContent, create_column_content(
|
40 |
"Model", "markdown", True, never_hidden=True)),
|
41 |
+
("soliditybench", ColumnContent, create_column_content("Score", "number", True)),
|
42 |
# ("average", ColumnContent, create_column_content("Average", "number", True)),
|
43 |
]
|
44 |
|
src/envs.py
CHANGED
@@ -18,7 +18,7 @@ REQUESTS_REPO = os.environ.get("REQUESTS_REPO")
|
|
18 |
RESULTS_REPO = os.environ.get("RESULTS_REPO")
|
19 |
|
20 |
# If you setup a cache later, just change HF_HOME
|
21 |
-
CACHE_PATH = os.path.join(os.getenv("HF_HOME", "."), "
|
22 |
|
23 |
# Local caches
|
24 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "requests")
|
|
|
18 |
RESULTS_REPO = os.environ.get("RESULTS_REPO")
|
19 |
|
20 |
# If you setup a cache later, just change HF_HOME
|
21 |
+
CACHE_PATH = os.path.join(os.getenv("HF_HOME", "."), "soliditybench")
|
22 |
|
23 |
# Local caches
|
24 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "requests")
|
src/leaderboard/read_evals.py
CHANGED
@@ -127,7 +127,7 @@ class EvalResult:
|
|
127 |
'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
|
128 |
}
|
129 |
|
130 |
-
|
131 |
non_zero_scores = {k: v for k, v in scores.items() if v != 0}
|
132 |
if non_zero_scores:
|
133 |
weights = {
|
@@ -136,7 +136,7 @@ class EvalResult:
|
|
136 |
'human_eval_solidity_pass_3': 0.2
|
137 |
}
|
138 |
total_weight = sum(weights[k] for k in non_zero_scores)
|
139 |
-
|
140 |
|
141 |
data_dict = {
|
142 |
"eval_name": self.eval_name, # not a column, just a save name,
|
@@ -148,7 +148,7 @@ class EvalResult:
|
|
148 |
AutoEvalColumn.model.name: make_clickable_model(self.model_name),
|
149 |
AutoEvalColumn.revision.name: self.revision,
|
150 |
# AutoEvalColumn.average.name: average,
|
151 |
-
AutoEvalColumn.
|
152 |
AutoEvalColumn.license.name: self.license,
|
153 |
AutoEvalColumn.likes.name: self.likes,
|
154 |
AutoEvalColumn.params.name: self.num_params,
|
|
|
127 |
'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
|
128 |
}
|
129 |
|
130 |
+
soliditybench = 0
|
131 |
non_zero_scores = {k: v for k, v in scores.items() if v != 0}
|
132 |
if non_zero_scores:
|
133 |
weights = {
|
|
|
136 |
'human_eval_solidity_pass_3': 0.2
|
137 |
}
|
138 |
total_weight = sum(weights[k] for k in non_zero_scores)
|
139 |
+
soliditybench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
|
140 |
|
141 |
data_dict = {
|
142 |
"eval_name": self.eval_name, # not a column, just a save name,
|
|
|
148 |
AutoEvalColumn.model.name: make_clickable_model(self.model_name),
|
149 |
AutoEvalColumn.revision.name: self.revision,
|
150 |
# AutoEvalColumn.average.name: average,
|
151 |
+
AutoEvalColumn.soliditybench.name: soliditybench,
|
152 |
AutoEvalColumn.license.name: self.license,
|
153 |
AutoEvalColumn.likes.name: self.likes,
|
154 |
AutoEvalColumn.params.name: self.num_params,
|
src/populate.py
CHANGED
@@ -20,7 +20,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
20 |
|
21 |
df = pd.DataFrame.from_records(all_data_json)
|
22 |
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
23 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
24 |
df = df[cols].round(decimals=2)
|
25 |
|
26 |
# filter out if any of the benchmarks have not been produced
|
|
|
20 |
|
21 |
df = pd.DataFrame.from_records(all_data_json)
|
22 |
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
23 |
+
df = df.sort_values(by=[AutoEvalColumn.soliditybench.name], ascending=False)
|
24 |
df = df[cols].round(decimals=2)
|
25 |
|
26 |
# filter out if any of the benchmarks have not been produced
|