wu981526092 commited on
Commit
3211e96
·
1 Parent(s): c8f9133
src/about.py CHANGED
@@ -37,7 +37,7 @@ This leaderboard provides continuous vulnerability assessment for open-source AI
37
  - **Dependency Management**: Vulnerability inheritance and supply chain security
38
  - **Regulatory Compliance**: Compliance readiness for various frameworks
39
 
40
- Lower scores indicate fewer vulnerabilities and lower risk. The overall risk score is a weighted average of all domains, with security given higher priority.
41
  """
42
 
43
  # Which evaluations are you running? how can people reproduce what you have?
 
37
  - **Dependency Management**: Vulnerability inheritance and supply chain security
38
  - **Regulatory Compliance**: Compliance readiness for various frameworks
39
 
40
+ Lower scores indicate fewer vulnerabilities and lower risk. The Trust Score is an equal-weighted average of all five domains, providing a balanced assessment of overall library trustworthiness.
41
  """
42
 
43
  # Which evaluations are you running? how can people reproduce what you have?
src/display/utils.py CHANGED
@@ -26,7 +26,7 @@ auto_eval_column_dict = []
26
  auto_eval_column_dict.append(["library_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["library", ColumnContent, ColumnContent("Library", "markdown", True, never_hidden=True)])
28
  #Scores
29
- auto_eval_column_dict.append(["overall_risk", ColumnContent, ColumnContent("Overall Risk ⬇️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Library information
 
26
  auto_eval_column_dict.append(["library_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["library", ColumnContent, ColumnContent("Library", "markdown", True, never_hidden=True)])
28
  #Scores
29
+ auto_eval_column_dict.append(["overall_risk", ColumnContent, ColumnContent("Trust Score ⬇️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Library information
src/leaderboard/read_evals.py CHANGED
@@ -104,16 +104,16 @@ class AssessmentResult:
104
 
105
  def to_dict(self):
106
  """Converts the Assessment Result to a dict compatible with our dataframe display"""
107
- # Calculate overall risk as weighted average
108
  weights = {
109
  "license_validation": 0.2,
110
- "security_assessment": 0.3,
111
  "maintenance_health": 0.2,
112
  "dependency_management": 0.2,
113
- "regulatory_compliance": 0.1
114
  }
115
 
116
- # Calculate overall risk - if domain is missing, use highest risk score (10)
117
  risk_sum = 0
118
  weight_sum = 0
119
 
@@ -122,7 +122,7 @@ class AssessmentResult:
122
  risk_sum += score * weight
123
  weight_sum += weight
124
 
125
- overall_risk = risk_sum / weight_sum if weight_sum > 0 else 10
126
 
127
  data_dict = {
128
  "assessment_id": self.assessment_id, # not a column, just a save name
@@ -132,7 +132,7 @@ class AssessmentResult:
132
  AutoEvalColumn.framework.name: self.framework,
133
  AutoEvalColumn.library.name: make_clickable_library(self.library_name),
134
  AutoEvalColumn.version.name: self.version,
135
- AutoEvalColumn.overall_risk.name: overall_risk,
136
  AutoEvalColumn.license_name.name: self.license,
137
  AutoEvalColumn.stars.name: self.stars,
138
  AutoEvalColumn.last_update.name: self.last_update,
 
104
 
105
  def to_dict(self):
106
  """Converts the Assessment Result to a dict compatible with our dataframe display"""
107
+ # Calculate Trust Score as equal-weight average
108
  weights = {
109
  "license_validation": 0.2,
110
+ "security_assessment": 0.2,
111
  "maintenance_health": 0.2,
112
  "dependency_management": 0.2,
113
+ "regulatory_compliance": 0.2
114
  }
115
 
116
+ # Calculate Trust Score - if domain is missing, use highest risk score (10)
117
  risk_sum = 0
118
  weight_sum = 0
119
 
 
122
  risk_sum += score * weight
123
  weight_sum += weight
124
 
125
+ trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
126
 
127
  data_dict = {
128
  "assessment_id": self.assessment_id, # not a column, just a save name
 
132
  AutoEvalColumn.framework.name: self.framework,
133
  AutoEvalColumn.library.name: make_clickable_library(self.library_name),
134
  AutoEvalColumn.version.name: self.version,
135
+ AutoEvalColumn.overall_risk.name: trust_score,
136
  AutoEvalColumn.license_name.name: self.license,
137
  AutoEvalColumn.stars.name: self.stars,
138
  AutoEvalColumn.last_update.name: self.last_update,
src/populate.py CHANGED
@@ -34,7 +34,7 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
34
  print(f"Warning: Column '{col}' missing, adding empty column")
35
  all_df[col] = 10.0 # Default to highest risk
36
 
37
- # Sort by overall risk score (ascending - lower is better)
38
  if AutoEvalColumn.overall_risk.name in all_df.columns:
39
  all_df = all_df.sort_values(by=[AutoEvalColumn.overall_risk.name])
40
 
 
34
  print(f"Warning: Column '{col}' missing, adding empty column")
35
  all_df[col] = 10.0 # Default to highest risk
36
 
37
+ # Sort by Trust Score (ascending - lower is better)
38
  if AutoEvalColumn.overall_risk.name in all_df.columns:
39
  all_df = all_df.sort_values(by=[AutoEvalColumn.overall_risk.name])
40