brunneis commited on
Commit
9a0321d
·
unverified ·
1 Parent(s): 2b85454
.gitignore CHANGED
@@ -1,4 +1,4 @@
1
- solbench/
2
  ignore/
3
  auto_evals/
4
  venv/
 
1
+ soliditybench/
2
  ignore/
3
  auto_evals/
4
  venv/
README.md CHANGED
@@ -7,11 +7,11 @@ sdk: gradio
7
  app_file: app.py
8
  pinned: true
9
  datasets:
10
- - braindao/solbench-naive-judge-openzeppelin-v1
11
  - braindao/humaneval-for-solidity-25
12
  license: apache-2.0
13
  sdk_version: 4.40.0
14
  thumbnail: >-
15
  https://cdn-uploads.huggingface.co/production/uploads/5f19edf678d261307936f4c8/4v6TPbN8qa6JptyCFUy-J.png
16
- short_description: Solbench Leaderboard
17
  ---
 
7
  app_file: app.py
8
  pinned: true
9
  datasets:
10
+ - braindao/soliditybench-naive-judge-openzeppelin-v1
11
  - braindao/humaneval-for-solidity-25
12
  license: apache-2.0
13
  sdk_version: 4.40.0
14
  thumbnail: >-
15
  https://cdn-uploads.huggingface.co/production/uploads/5f19edf678d261307936f4c8/4v6TPbN8qa6JptyCFUy-J.png
16
+ short_description: SolidityBench Leaderboard
17
  ---
app.py CHANGED
@@ -254,5 +254,5 @@ scheduler.add_job(restart_space, "interval", seconds=900)
254
  scheduler.start()
255
  demo.queue(default_concurrency_limit=40).launch(
256
  server_name="0.0.0.0",
257
- allowed_paths=["images/solbench.svg"],
258
  )
 
254
  scheduler.start()
255
  demo.queue(default_concurrency_limit=40).launch(
256
  server_name="0.0.0.0",
257
+ allowed_paths=["images/soliditybench.svg"],
258
  )
images/{solbench.svg → soliditybench.svg} RENAMED
File without changes
soliditybench.svg ADDED
src/about.py CHANGED
@@ -29,7 +29,7 @@ class Tasks(Enum):
29
  # ---------------------------------------------------
30
 
31
  # Your leaderboard name
32
- TITLE = """<br><img src="file/images/solbench.svg" width="500" style="display: block; margin-left: auto; margin-right: auto;">
33
  <h2 align="center" id="space-title">Solidity Leaderboard | Powered by IQ</h2>"""
34
 
35
  # What does your leaderboard evaluate?
 
29
  # ---------------------------------------------------
30
 
31
  # Your leaderboard name
32
+ TITLE = """<br><img src="file/images/soliditybench.svg" width="500" style="display: block; margin-left: auto; margin-right: auto;">
33
  <h2 align="center" id="space-title">Solidity Leaderboard | Powered by IQ</h2>"""
34
 
35
  # What does your leaderboard evaluate?
src/display/utils.py CHANGED
@@ -38,7 +38,7 @@ auto_eval_column_dict = [
38
  "", "str", True, never_hidden=True)),
39
  ("model", ColumnContent, create_column_content(
40
  "Model", "markdown", True, never_hidden=True)),
41
- ("solbench", ColumnContent, create_column_content("Score", "number", True)),
42
  # ("average", ColumnContent, create_column_content("Average", "number", True)),
43
  ]
44
 
 
38
  "", "str", True, never_hidden=True)),
39
  ("model", ColumnContent, create_column_content(
40
  "Model", "markdown", True, never_hidden=True)),
41
+ ("soliditybench", ColumnContent, create_column_content("Score", "number", True)),
42
  # ("average", ColumnContent, create_column_content("Average", "number", True)),
43
  ]
44
 
src/envs.py CHANGED
@@ -18,7 +18,7 @@ REQUESTS_REPO = os.environ.get("REQUESTS_REPO")
18
  RESULTS_REPO = os.environ.get("RESULTS_REPO")
19
 
20
  # If you setup a cache later, just change HF_HOME
21
- CACHE_PATH = os.path.join(os.getenv("HF_HOME", "."), "solbench")
22
 
23
  # Local caches
24
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "requests")
 
18
  RESULTS_REPO = os.environ.get("RESULTS_REPO")
19
 
20
  # If you setup a cache later, just change HF_HOME
21
+ CACHE_PATH = os.path.join(os.getenv("HF_HOME", "."), "soliditybench")
22
 
23
  # Local caches
24
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "requests")
src/leaderboard/read_evals.py CHANGED
@@ -127,7 +127,7 @@ class EvalResult:
127
  'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
128
  }
129
 
130
- solbench = 0
131
  non_zero_scores = {k: v for k, v in scores.items() if v != 0}
132
  if non_zero_scores:
133
  weights = {
@@ -136,7 +136,7 @@ class EvalResult:
136
  'human_eval_solidity_pass_3': 0.2
137
  }
138
  total_weight = sum(weights[k] for k in non_zero_scores)
139
- solbench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
140
 
141
  data_dict = {
142
  "eval_name": self.eval_name, # not a column, just a save name,
@@ -148,7 +148,7 @@ class EvalResult:
148
  AutoEvalColumn.model.name: make_clickable_model(self.model_name),
149
  AutoEvalColumn.revision.name: self.revision,
150
  # AutoEvalColumn.average.name: average,
151
- AutoEvalColumn.solbench.name: solbench,
152
  AutoEvalColumn.license.name: self.license,
153
  AutoEvalColumn.likes.name: self.likes,
154
  AutoEvalColumn.params.name: self.num_params,
 
127
  'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
128
  }
129
 
130
+ soliditybench = 0
131
  non_zero_scores = {k: v for k, v in scores.items() if v != 0}
132
  if non_zero_scores:
133
  weights = {
 
136
  'human_eval_solidity_pass_3': 0.2
137
  }
138
  total_weight = sum(weights[k] for k in non_zero_scores)
139
+ soliditybench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
140
 
141
  data_dict = {
142
  "eval_name": self.eval_name, # not a column, just a save name,
 
148
  AutoEvalColumn.model.name: make_clickable_model(self.model_name),
149
  AutoEvalColumn.revision.name: self.revision,
150
  # AutoEvalColumn.average.name: average,
151
+ AutoEvalColumn.soliditybench.name: soliditybench,
152
  AutoEvalColumn.license.name: self.license,
153
  AutoEvalColumn.likes.name: self.likes,
154
  AutoEvalColumn.params.name: self.num_params,
src/populate.py CHANGED
@@ -20,7 +20,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
20
 
21
  df = pd.DataFrame.from_records(all_data_json)
22
  # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
23
- df = df.sort_values(by=[AutoEvalColumn.solbench.name], ascending=False)
24
  df = df[cols].round(decimals=2)
25
 
26
  # filter out if any of the benchmarks have not been produced
 
20
 
21
  df = pd.DataFrame.from_records(all_data_json)
22
  # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
23
+ df = df.sort_values(by=[AutoEvalColumn.soliditybench.name], ascending=False)
24
  df = df[cols].round(decimals=2)
25
 
26
  # filter out if any of the benchmarks have not been produced