seonglae-holistic commited on
Commit
124bec5
·
1 Parent(s): 93c138a

refactor: migrate to pydantic model

Browse files
Files changed (4) hide show
  1. app.py +23 -24
  2. src/display/utils.py +68 -52
  3. src/leaderboard/read_evals.py +41 -39
  4. src/populate.py +4 -5
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import gradio as gr
 
2
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
  import os
7
- import shutil
8
 
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
@@ -22,10 +22,9 @@ from src.display.utils import (
22
  EVAL_COLS,
23
  EVAL_TYPES,
24
  AutoEvalColumn,
 
25
  LibraryType,
26
- fields,
27
  Language,
28
- AssessmentStatus
29
  )
30
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, LOCAL_MODE
31
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -94,34 +93,34 @@ def init_leaderboard(dataframe):
94
  if dataframe is None or dataframe.empty:
95
  # Create an empty dataframe with the expected columns
96
  all_columns = COLS + [task.value.col_name for task in Tasks]
97
- empty_df = pd.DataFrame(columns=all_columns)
98
  print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
99
  dataframe = empty_df
100
-
101
- return Leaderboard(
102
- value=dataframe,
103
- datatype=[c.type for c in fields(AutoEvalColumn)],
104
- select_columns=SelectColumns(
105
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
106
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
107
- label="Select Columns to Display:",
108
- ),
109
- search_columns=[AutoEvalColumn.library.name, AutoEvalColumn.license_name.name],
110
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
111
- filter_columns=[
112
- ColumnFilter(AutoEvalColumn.library_type.name, type="checkboxgroup", label="Library types"),
113
- ColumnFilter(AutoEvalColumn.language.name, type="checkboxgroup", label="Programming Language"),
114
  ColumnFilter(
115
- AutoEvalColumn.stars.name,
116
  type="slider",
117
  min=0,
118
  max=50000,
119
  label="GitHub Stars",
120
  ),
121
  ColumnFilter(
122
- AutoEvalColumn.availability.name, type="boolean", label="Show only active libraries", default=True
123
  ),
124
- ],
 
 
 
 
 
 
 
 
 
 
 
125
  bool_checkboxgroup_label="Filter libraries",
126
  interactive=False,
127
  )
@@ -150,7 +149,7 @@ with demo:
150
  open=False,
151
  ):
152
  with gr.Row():
153
- finished_eval_table = gr.components.Dataframe(
154
  value=finished_eval_queue_df,
155
  headers=EVAL_COLS,
156
  datatype=EVAL_TYPES,
@@ -161,7 +160,7 @@ with demo:
161
  open=False,
162
  ):
163
  with gr.Row():
164
- running_eval_table = gr.components.Dataframe(
165
  value=running_eval_queue_df,
166
  headers=EVAL_COLS,
167
  datatype=EVAL_TYPES,
@@ -173,7 +172,7 @@ with demo:
173
  open=False,
174
  ):
175
  with gr.Row():
176
- pending_eval_table = gr.components.Dataframe(
177
  value=pending_eval_queue_df,
178
  headers=EVAL_COLS,
179
  datatype=EVAL_TYPES,
 
1
  import gradio as gr
2
+ from gradio.components import Dataframe
3
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
4
  import pandas as pd
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
  from huggingface_hub import snapshot_download
7
  import os
 
8
 
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
 
22
  EVAL_COLS,
23
  EVAL_TYPES,
24
  AutoEvalColumn,
25
+ auto_eval_column_attrs,
26
  LibraryType,
 
27
  Language,
 
28
  )
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, LOCAL_MODE
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
93
  if dataframe is None or dataframe.empty:
94
  # Create an empty dataframe with the expected columns
95
  all_columns = COLS + [task.value.col_name for task in Tasks]
96
+ empty_df = pd.DataFrame(columns=pd.Index(all_columns))
97
  print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
98
  dataframe = empty_df
99
+ filter_columns = [
100
+ ColumnFilter(auto_eval_column_attrs.library_type.name, type="checkboxgroup", label="Library types"),
101
+ ColumnFilter(auto_eval_column_attrs.language.name, type="checkboxgroup", label="Programming Language"),
 
 
 
 
 
 
 
 
 
 
 
102
  ColumnFilter(
103
+ auto_eval_column_attrs.stars.name,
104
  type="slider",
105
  min=0,
106
  max=50000,
107
  label="GitHub Stars",
108
  ),
109
  ColumnFilter(
110
+ auto_eval_column_attrs.availability.name, type="boolean", label="Show only active libraries" # type: ignore
111
  ),
112
+ ]
113
+ return Leaderboard(
114
+ value=dataframe,
115
+ datatype=[getattr(auto_eval_column_attrs, field).type for field in AutoEvalColumn.model_fields],
116
+ select_columns=SelectColumns(
117
+ default_selection=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).displayed_by_default],
118
+ cant_deselect=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).never_hidden],
119
+ label="Select Columns to Display:",
120
+ ),
121
+ search_columns=[auto_eval_column_attrs.library.name, auto_eval_column_attrs.license_name.name],
122
+ hide_columns=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).hidden],
123
+ filter_columns=filter_columns, # type: ignore
124
  bool_checkboxgroup_label="Filter libraries",
125
  interactive=False,
126
  )
 
149
  open=False,
150
  ):
151
  with gr.Row():
152
+ finished_eval_table = Dataframe(
153
  value=finished_eval_queue_df,
154
  headers=EVAL_COLS,
155
  datatype=EVAL_TYPES,
 
160
  open=False,
161
  ):
162
  with gr.Row():
163
+ running_eval_table = Dataframe(
164
  value=running_eval_queue_df,
165
  headers=EVAL_COLS,
166
  datatype=EVAL_TYPES,
 
172
  open=False,
173
  ):
174
  with gr.Row():
175
+ pending_eval_table = Dataframe(
176
  value=pending_eval_queue_df,
177
  headers=EVAL_COLS,
178
  datatype=EVAL_TYPES,
src/display/utils.py CHANGED
@@ -2,16 +2,13 @@ from dataclasses import dataclass
2
  from enum import Enum
3
 
4
  from src.about import Tasks
5
-
6
- def fields(raw_class):
7
- return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
8
 
9
 
10
  # These classes are for user facing column names,
11
  # to avoid having to change them all around the code
12
  # when a modif is needed
13
- @dataclass
14
- class ColumnContent:
15
  name: str
16
  type: str
17
  displayed_by_default: bool
@@ -19,42 +16,61 @@ class ColumnContent:
19
  never_hidden: bool = False
20
 
21
  ## Leaderboard columns
22
- # Create a dictionary to hold the class attributes
23
- auto_eval_column_attrs = {}
24
-
25
- # Init
26
- auto_eval_column_attrs["library_type_symbol"] = ColumnContent("T", "str", True, never_hidden=True)
27
- auto_eval_column_attrs["library"] = ColumnContent("Library", "markdown", True, never_hidden=True)
28
-
29
- # Scores
30
- auto_eval_column_attrs["overall_risk"] = ColumnContent("Trust Score ⬇️", "number", True)
31
- for task in Tasks:
32
- auto_eval_column_attrs[task.name] = ColumnContent(task.value.col_name, "number", True)
33
-
34
- # Library information
35
- auto_eval_column_attrs["library_type"] = ColumnContent("Type", "str", False)
36
- auto_eval_column_attrs["framework"] = ColumnContent("Framework", "str", False)
37
- auto_eval_column_attrs["version"] = ColumnContent("Version", "str", False, False)
38
- auto_eval_column_attrs["language"] = ColumnContent("Language", "str", False)
39
- auto_eval_column_attrs["license_name"] = ColumnContent("License", "str", True)
40
- auto_eval_column_attrs["stars"] = ColumnContent("GitHub ⭐", "number", False)
41
- auto_eval_column_attrs["last_update"] = ColumnContent("Last Updated", "str", False)
42
- auto_eval_column_attrs["verified"] = ColumnContent("Independently Verified", "bool", False)
43
- auto_eval_column_attrs["availability"] = ColumnContent("Active Maintenance", "bool", True)
44
- auto_eval_column_attrs["report_url"] = ColumnContent("Report", "str", True)
45
-
46
- # Create the dataclass with class attributes
47
- AutoEvalColumn = type("AutoEvalColumn", (), auto_eval_column_attrs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  ## For the queue columns in the submission tab
50
  @dataclass(frozen=True)
51
  class EvalQueueColumn: # Queue column
52
- library = ColumnContent("library", "markdown", True)
53
- version = ColumnContent("version", "str", True)
54
- language = ColumnContent("language", "str", True)
55
- framework = ColumnContent("framework", "str", True)
56
- library_type = ColumnContent("library_type", "str", True)
57
- status = ColumnContent("status", "str", True)
58
 
59
  ## All the library information that we might need
60
  @dataclass
@@ -65,27 +81,27 @@ class LibraryDetails:
65
 
66
 
67
  class LibraryType(Enum):
68
- ML = LibraryDetails(name="machine learning", symbol="🟢")
69
- LLM = LibraryDetails(name="llm framework", symbol="🔶")
70
- AGENT = LibraryDetails(name="agent framework", symbol="⭕")
71
- VIS = LibraryDetails(name="visualization", symbol="🟦")
72
- GENERAL = LibraryDetails(name="general ai", symbol="🟣")
73
  Unknown = LibraryDetails(name="", symbol="?")
74
 
75
  def to_str(self, separator=" "):
76
  return f"{self.value.symbol}{separator}{self.value.name}"
77
 
78
  @staticmethod
79
- def from_str(type):
80
- if "machine learning" in type or "🟢" in type:
81
  return LibraryType.ML
82
- if "llm framework" in type or "🔶" in type:
83
  return LibraryType.LLM
84
- if "agent framework" in type or "⭕" in type:
85
  return LibraryType.AGENT
86
- if "visualization" in type or "🟦" in type:
87
  return LibraryType.VIS
88
- if "general ai" in type or "🟣" in type:
89
  return LibraryType.GENERAL
90
  return LibraryType.Unknown
91
 
@@ -103,11 +119,11 @@ class AssessmentStatus(Enum):
103
  Disputed = LibraryDetails("Disputed")
104
 
105
  # Column selection
106
- COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
 
107
 
108
- EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
109
- EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
110
 
111
  # Task columns for benchmarking - use the display column names from the Tasks enum
112
  BENCHMARK_COLS = [task.value.col_name for task in Tasks]
113
-
 
2
  from enum import Enum
3
 
4
  from src.about import Tasks
5
+ from pydantic import BaseModel
 
 
6
 
7
 
8
  # These classes are for user facing column names,
9
  # to avoid having to change them all around the code
10
  # when a modif is needed
11
+ class ColumnContent(BaseModel):
 
12
  name: str
13
  type: str
14
  displayed_by_default: bool
 
16
  never_hidden: bool = False
17
 
18
  ## Leaderboard columns
19
+ class AutoEvalColumn(BaseModel):
20
+ library_type_symbol: ColumnContent
21
+ library: ColumnContent
22
+ overall_risk: ColumnContent
23
+ # Task columns
24
+ license: ColumnContent
25
+ security: ColumnContent
26
+ maintenance: ColumnContent
27
+ dependency: ColumnContent
28
+ regulatory: ColumnContent
29
+ # Library information
30
+ library_type: ColumnContent
31
+ framework: ColumnContent
32
+ version: ColumnContent
33
+ language: ColumnContent
34
+ license_name: ColumnContent
35
+ stars: ColumnContent
36
+ availability: ColumnContent
37
+ report_url: ColumnContent
38
+ last_update: ColumnContent
39
+ verified: ColumnContent
40
+
41
+ auto_eval_column_attrs = AutoEvalColumn(
42
+ library_type_symbol=ColumnContent(name="T", type="str", displayed_by_default=True, never_hidden=True),
43
+ library=ColumnContent(name="Library", type="markdown", displayed_by_default=True, never_hidden=True),
44
+ overall_risk=ColumnContent(name="Trust Score", type="number", displayed_by_default=True),
45
+ # Task columns from Tasks enum
46
+ license=ColumnContent(name="License Risk", type="number", displayed_by_default=True),
47
+ security=ColumnContent(name="Security Risk", type="number", displayed_by_default=True),
48
+ maintenance=ColumnContent(name="Maintenance Risk", type="number", displayed_by_default=True),
49
+ dependency=ColumnContent(name="Dependency Risk", type="number", displayed_by_default=True),
50
+ regulatory=ColumnContent(name="Regulatory Risk", type="number", displayed_by_default=True),
51
+ # Library information
52
+ library_type=ColumnContent(name="Type", type="str", displayed_by_default=False),
53
+ framework=ColumnContent(name="Framework", type="str", displayed_by_default=False),
54
+ version=ColumnContent(name="Version", type="str", displayed_by_default=False, hidden=True),
55
+ language=ColumnContent(name="Language", type="str", displayed_by_default=False),
56
+ license_name=ColumnContent(name="License", type="str", displayed_by_default=True),
57
+ stars=ColumnContent(name="GitHub ⭐", type="number", displayed_by_default=False),
58
+ availability=ColumnContent(name="Active Maintenance", type="bool", displayed_by_default=True),
59
+ report_url=ColumnContent(name="Report", type="markdown", displayed_by_default=True),
60
+ last_update=ColumnContent(name="Last Update", type="str", displayed_by_default=False),
61
+ verified=ColumnContent(name="Verified", type="bool", displayed_by_default=False),
62
+ )
63
+
64
 
65
  ## For the queue columns in the submission tab
66
  @dataclass(frozen=True)
67
  class EvalQueueColumn: # Queue column
68
+ library = ColumnContent(name="library", type="markdown", displayed_by_default=True)
69
+ version = ColumnContent(name="version", type="str", displayed_by_default=True)
70
+ language = ColumnContent(name="language", type="str", displayed_by_default=True)
71
+ framework = ColumnContent(name="framework", type="str", displayed_by_default=True)
72
+ library_type = ColumnContent(name="library_type", type="str", displayed_by_default=True)
73
+ status = ColumnContent(name="status", type="str", displayed_by_default=True)
74
 
75
  ## All the library information that we might need
76
  @dataclass
 
81
 
82
 
83
  class LibraryType(Enum):
84
+ ML = LibraryDetails(name="Machine Learning", symbol="🟢")
85
+ LLM = LibraryDetails(name="LLM Framework", symbol="🔶")
86
+ AGENT = LibraryDetails(name="Agent Framework", symbol="⭕")
87
+ VIS = LibraryDetails(name="LLM Inference", symbol="🟦")
88
+ GENERAL = LibraryDetails(name="LLM Orchestration", symbol="🟣")
89
  Unknown = LibraryDetails(name="", symbol="?")
90
 
91
  def to_str(self, separator=" "):
92
  return f"{self.value.symbol}{separator}{self.value.name}"
93
 
94
  @staticmethod
95
+ def from_str(type: str) -> "LibraryType":
96
+ if "Machine Learning" in type or "🟢" in type:
97
  return LibraryType.ML
98
+ if "LLM Framework" in type or "🔶" in type:
99
  return LibraryType.LLM
100
+ if "Agent Framework" in type or "⭕" in type:
101
  return LibraryType.AGENT
102
+ if "LLM Inference" in type or "🟦" in type:
103
  return LibraryType.VIS
104
+ if "LLM Orchestration" in type or "🟣" in type:
105
  return LibraryType.GENERAL
106
  return LibraryType.Unknown
107
 
 
119
  Disputed = LibraryDetails("Disputed")
120
 
121
  # Column selection
122
+ COLS = [getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if not getattr(auto_eval_column_attrs, field).hidden]
123
+ fields = AutoEvalColumn.model_fields
124
 
125
+ EVAL_COLS = [getattr(EvalQueueColumn, field).name for field in vars(EvalQueueColumn) if not field.startswith('_')]
126
+ EVAL_TYPES = [getattr(EvalQueueColumn, field).type for field in vars(EvalQueueColumn) if not field.startswith('_')]
127
 
128
  # Task columns for benchmarking - use the display column names from the Tasks enum
129
  BENCHMARK_COLS = [task.value.col_name for task in Tasks]
 
src/leaderboard/read_evals.py CHANGED
@@ -1,18 +1,14 @@
1
  import glob
2
  import json
3
- import math
4
  import os
5
- from dataclasses import dataclass
6
  from datetime import datetime
7
-
8
- import numpy as np
9
 
10
  from src.display.formatting import make_clickable_library, make_clickable_report
11
- from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
12
 
13
 
14
- @dataclass
15
- class AssessmentResult:
16
  """Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
17
  """
18
  assessment_id: str # Unique identifier
@@ -32,7 +28,7 @@ class AssessmentResult:
32
  report_url: str = "" # URL to detailed assessment report
33
 
34
  @classmethod
35
- def init_from_json_file(self, json_filepath):
36
  """Initializes the assessment result from a JSON file"""
37
  with open(json_filepath) as fp:
38
  data = json.load(fp)
@@ -43,7 +39,7 @@ class AssessmentResult:
43
  org_and_repo = library_name.split("/", 1)
44
 
45
  if len(org_and_repo) == 1:
46
- org = None
47
  repo = org_and_repo[0]
48
  assessment_id = f"{repo}_{assessment.get('version', '')}"
49
  else:
@@ -71,10 +67,11 @@ class AssessmentResult:
71
  # Format date for display
72
  dt = datetime.fromisoformat(last_update)
73
  last_update = dt.strftime("%Y-%m-%d")
74
- except:
 
75
  pass
76
 
77
- return self(
78
  assessment_id=assessment_id,
79
  library_name=library_name,
80
  org=org,
@@ -90,18 +87,6 @@ class AssessmentResult:
90
  report_url=assessment.get("report_url", ""),
91
  )
92
 
93
- def update_with_request_file(self, requests_path):
94
- """Finds the relevant request file for the current library and updates info with it"""
95
- request_file = get_request_file_for_library(requests_path, self.library_name, self.version)
96
-
97
- try:
98
- with open(request_file, "r") as f:
99
- request = json.load(f)
100
- self.library_type = LibraryType.from_str(request.get("library_type", ""))
101
- self.stars = request.get("stars", 0)
102
- except Exception:
103
- print(f"Could not find request file for {self.library_name} version {self.version}")
104
-
105
  def to_dict(self):
106
  """Converts the Assessment Result to a dict compatible with our dataframe display"""
107
  # Calculate Trust Score as equal-weight average
@@ -123,22 +108,24 @@ class AssessmentResult:
123
  weight_sum += weight
124
 
125
  trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
 
 
126
 
127
  data_dict = {
128
  "assessment_id": self.assessment_id, # not a column, just a save name
129
- AutoEvalColumn.library_type.name: self.library_type.value.name,
130
- AutoEvalColumn.library_type_symbol.name: self.library_type.value.symbol,
131
- AutoEvalColumn.language.name: self.language.value.name,
132
- AutoEvalColumn.framework.name: self.framework,
133
- AutoEvalColumn.library.name: make_clickable_library(self.library_name),
134
- AutoEvalColumn.version.name: self.version,
135
- AutoEvalColumn.overall_risk.name: trust_score,
136
- AutoEvalColumn.license_name.name: self.license,
137
- AutoEvalColumn.stars.name: self.stars,
138
- AutoEvalColumn.last_update.name: self.last_update,
139
- AutoEvalColumn.verified.name: self.verified,
140
- AutoEvalColumn.availability.name: self.availability,
141
- AutoEvalColumn.report_url.name: make_clickable_report(self.report_url),
142
  }
143
 
144
  # Add task-specific risk scores - map to display column names
@@ -147,11 +134,25 @@ class AssessmentResult:
147
  benchmark_key = task_enum.benchmark # e.g., "license_validation"
148
  col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
149
  risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
150
- data_dict[col_name] = risk_score
 
151
 
152
  return data_dict
153
 
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def get_request_file_for_library(requests_path, library_name, version):
156
  """Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
157
  # Try multiple naming patterns for flexibility
@@ -203,7 +204,8 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
203
  # Sort the files by date if they have date info
204
  try:
205
  files.sort(key=lambda x: datetime.fromisoformat(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
206
- except:
 
207
  pass
208
 
209
  for file in files:
@@ -213,7 +215,7 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
213
  for assessment_filepath in assessment_filepaths:
214
  # Creation of result
215
  assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
216
- assessment_result.update_with_request_file(requests_path)
217
 
218
  # Store results of same eval together
219
  assessment_id = assessment_result.assessment_id
 
1
  import glob
2
  import json
 
3
  import os
 
4
  from datetime import datetime
5
+ from pydantic import BaseModel
 
6
 
7
  from src.display.formatting import make_clickable_library, make_clickable_report
8
+ from src.display.utils import auto_eval_column_attrs, LibraryType, Tasks, Language
9
 
10
 
11
+ class AssessmentResult(BaseModel):
 
12
  """Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
13
  """
14
  assessment_id: str # Unique identifier
 
28
  report_url: str = "" # URL to detailed assessment report
29
 
30
  @classmethod
31
+ def init_from_json_file(cls, json_filepath):
32
  """Initializes the assessment result from a JSON file"""
33
  with open(json_filepath) as fp:
34
  data = json.load(fp)
 
39
  org_and_repo = library_name.split("/", 1)
40
 
41
  if len(org_and_repo) == 1:
42
+ org = ""
43
  repo = org_and_repo[0]
44
  assessment_id = f"{repo}_{assessment.get('version', '')}"
45
  else:
 
67
  # Format date for display
68
  dt = datetime.fromisoformat(last_update)
69
  last_update = dt.strftime("%Y-%m-%d")
70
+ except Exception as e:
71
+ print(e)
72
  pass
73
 
74
+ return cls(
75
  assessment_id=assessment_id,
76
  library_name=library_name,
77
  org=org,
 
87
  report_url=assessment.get("report_url", ""),
88
  )
89
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def to_dict(self):
91
  """Converts the Assessment Result to a dict compatible with our dataframe display"""
92
  # Calculate Trust Score as equal-weight average
 
108
  weight_sum += weight
109
 
110
  trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
111
+ # Round to 1 decimal place
112
+ trust_score = round(trust_score, 1)
113
 
114
  data_dict = {
115
  "assessment_id": self.assessment_id, # not a column, just a save name
116
+ auto_eval_column_attrs.library_type.name: self.library_type.value.name,
117
+ auto_eval_column_attrs.library_type_symbol.name: self.library_type.value.symbol,
118
+ auto_eval_column_attrs.language.name: self.language.value.name,
119
+ auto_eval_column_attrs.framework.name: self.framework,
120
+ auto_eval_column_attrs.library.name: make_clickable_library(self.library_name),
121
+ auto_eval_column_attrs.version.name: self.version,
122
+ auto_eval_column_attrs.overall_risk.name: trust_score,
123
+ auto_eval_column_attrs.license_name.name: self.license,
124
+ auto_eval_column_attrs.stars.name: self.stars,
125
+ auto_eval_column_attrs.last_update.name: self.last_update,
126
+ auto_eval_column_attrs.verified.name: self.verified,
127
+ auto_eval_column_attrs.availability.name: self.availability,
128
+ auto_eval_column_attrs.report_url.name: make_clickable_report(self.report_url),
129
  }
130
 
131
  # Add task-specific risk scores - map to display column names
 
134
  benchmark_key = task_enum.benchmark # e.g., "license_validation"
135
  col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
136
  risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
137
+ # Round to 1 decimal place
138
+ data_dict[col_name] = round(risk_score, 1)
139
 
140
  return data_dict
141
 
142
 
143
+ def update_with_request_file(self, assessment_filepath):
144
+ """Finds the relevant request file for the current library and updates info with it"""
145
+ try:
146
+ with open(assessment_filepath, "r") as f:
147
+ request = json.load(f)["assessment"]
148
+ self.library_type = LibraryType.from_str(request.get("framework", ""))
149
+ self.stars = request.get("github_stars", 0)
150
+ except Exception as e:
151
+ print(e)
152
+ print(f"Could not find request file for {self.library_name} version {self.version}")
153
+
154
+
155
+
156
  def get_request_file_for_library(requests_path, library_name, version):
157
  """Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
158
  # Try multiple naming patterns for flexibility
 
204
  # Sort the files by date if they have date info
205
  try:
206
  files.sort(key=lambda x: datetime.fromisoformat(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
207
+ except Exception as e:
208
+ print(e)
209
  pass
210
 
211
  for file in files:
 
215
  for assessment_filepath in assessment_filepaths:
216
  # Creation of result
217
  assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
218
+ assessment_result.update_with_request_file(assessment_filepath)
219
 
220
  # Store results of same eval together
221
  assessment_id = assessment_result.assessment_id
src/populate.py CHANGED
@@ -2,9 +2,8 @@
2
 
3
  import pandas as pd
4
 
5
- from src.display.utils import AutoEvalColumn
6
  from src.leaderboard.read_evals import get_raw_assessment_results
7
- from src.about import Tasks
8
 
9
 
10
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
@@ -35,8 +34,8 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
35
  all_df[col] = 10.0 # Default to highest risk
36
 
37
  # Sort by Trust Score (ascending - lower is better)
38
- if AutoEvalColumn.overall_risk.name in all_df.columns:
39
- all_df = all_df.sort_values(by=[AutoEvalColumn.overall_risk.name])
40
 
41
  return all_df
42
 
@@ -72,7 +71,7 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
72
 
73
  for file_path in request_files:
74
  try:
75
- with open(file_path, "r") as f:
76
  data = json.load(f)
77
 
78
  # Extract relevant fields
 
2
 
3
  import pandas as pd
4
 
5
+ from src.display.utils import auto_eval_column_attrs
6
  from src.leaderboard.read_evals import get_raw_assessment_results
 
7
 
8
 
9
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
 
34
  all_df[col] = 10.0 # Default to highest risk
35
 
36
  # Sort by Trust Score (ascending - lower is better)
37
+ if auto_eval_column_attrs.overall_risk.name in all_df.columns:
38
+ all_df = all_df.sort_values(by=[auto_eval_column_attrs.overall_risk.name])
39
 
40
  return all_df
41
 
 
71
 
72
  for file_path in request_files:
73
  try:
74
+ with open(file_path, "r", encoding="utf-8") as f:
75
  data = json.load(f)
76
 
77
  # Extract relevant fields