lukehinds commited on
Commit
beeec80
·
1 Parent(s): c134e28

Change all print instances to logs

Browse files
src/core/queue_manager.py CHANGED
@@ -19,17 +19,17 @@ class QueueItem:
19
  request_id: str
20
  model_id: str
21
  revision: str
22
-
23
  class QueueManager:
24
  """Manages evaluation request queue with persistence."""
25
-
26
  def __init__(self, queue_dir: str):
27
  self.queue_dir = queue_dir
28
  self.queue = PriorityQueue()
29
  self.active_evaluations: List[str] = []
30
  self.lock = Lock()
31
  self._load_persisted_queue()
32
-
33
  def _load_persisted_queue(self) -> None:
34
  """Load persisted queue items from disk."""
35
  try:
@@ -48,14 +48,14 @@ class QueueManager:
48
  logger.info(f"Loaded {self.queue.qsize()} items from persisted queue")
49
  except Exception as e:
50
  logger.error(f"Failed to load persisted queue: {str(e)}")
51
-
52
  def _persist_queue(self) -> None:
53
  """Persist current queue state to disk."""
54
  try:
55
  # Create a list of all queue items
56
  items = []
57
  temp_queue = PriorityQueue()
58
-
59
  while not self.queue.empty():
60
  item = self.queue.get()
61
  items.append({
@@ -66,20 +66,20 @@ class QueueManager:
66
  'revision': item.revision
67
  })
68
  temp_queue.put(item)
69
-
70
  # Restore queue
71
  self.queue = temp_queue
72
-
73
  # Save to disk
74
  os.makedirs(self.queue_dir, exist_ok=True)
75
  queue_file = os.path.join(self.queue_dir, "queue_state.json")
76
  with open(queue_file, 'w') as f:
77
  json.dump(items, f, indent=2)
78
-
79
  logger.info(f"Persisted {len(items)} items to queue state")
80
  except Exception as e:
81
  logger.error(f"Failed to persist queue: {str(e)}")
82
-
83
  def add_request(self, model_id: str, revision: str, priority: int = 1) -> str:
84
  """Add new evaluation request to queue."""
85
  with self.lock:
@@ -95,7 +95,7 @@ class QueueManager:
95
  self._persist_queue()
96
  logger.info(f"Added request {request_id} to queue")
97
  return request_id
98
-
99
  def get_next_request(self) -> Optional[QueueItem]:
100
  """Get next request from queue."""
101
  with self.lock:
@@ -106,14 +106,14 @@ class QueueManager:
106
  logger.info(f"Retrieved request {item.request_id} from queue")
107
  return item
108
  return None
109
-
110
  def mark_complete(self, request_id: str) -> None:
111
  """Mark evaluation request as complete."""
112
  with self.lock:
113
  if request_id in self.active_evaluations:
114
  self.active_evaluations.remove(request_id)
115
  logger.info(f"Marked request {request_id} as complete")
116
-
117
  def get_queue_status(self) -> dict:
118
  """Get current queue status."""
119
  with self.lock:
 
19
  request_id: str
20
  model_id: str
21
  revision: str
22
+
23
  class QueueManager:
24
  """Manages evaluation request queue with persistence."""
25
+
26
  def __init__(self, queue_dir: str):
27
  self.queue_dir = queue_dir
28
  self.queue = PriorityQueue()
29
  self.active_evaluations: List[str] = []
30
  self.lock = Lock()
31
  self._load_persisted_queue()
32
+
33
  def _load_persisted_queue(self) -> None:
34
  """Load persisted queue items from disk."""
35
  try:
 
48
  logger.info(f"Loaded {self.queue.qsize()} items from persisted queue")
49
  except Exception as e:
50
  logger.error(f"Failed to load persisted queue: {str(e)}")
51
+
52
  def _persist_queue(self) -> None:
53
  """Persist current queue state to disk."""
54
  try:
55
  # Create a list of all queue items
56
  items = []
57
  temp_queue = PriorityQueue()
58
+
59
  while not self.queue.empty():
60
  item = self.queue.get()
61
  items.append({
 
66
  'revision': item.revision
67
  })
68
  temp_queue.put(item)
69
+
70
  # Restore queue
71
  self.queue = temp_queue
72
+
73
  # Save to disk
74
  os.makedirs(self.queue_dir, exist_ok=True)
75
  queue_file = os.path.join(self.queue_dir, "queue_state.json")
76
  with open(queue_file, 'w') as f:
77
  json.dump(items, f, indent=2)
78
+
79
  logger.info(f"Persisted {len(items)} items to queue state")
80
  except Exception as e:
81
  logger.error(f"Failed to persist queue: {str(e)}")
82
+
83
  def add_request(self, model_id: str, revision: str, priority: int = 1) -> str:
84
  """Add new evaluation request to queue."""
85
  with self.lock:
 
95
  self._persist_queue()
96
  logger.info(f"Added request {request_id} to queue")
97
  return request_id
98
+
99
  def get_next_request(self) -> Optional[QueueItem]:
100
  """Get next request from queue."""
101
  with self.lock:
 
106
  logger.info(f"Retrieved request {item.request_id} from queue")
107
  return item
108
  return None
109
+
110
  def mark_complete(self, request_id: str) -> None:
111
  """Mark evaluation request as complete."""
112
  with self.lock:
113
  if request_id in self.active_evaluations:
114
  self.active_evaluations.remove(request_id)
115
  logger.info(f"Marked request {request_id} as complete")
116
+
117
  def get_queue_status(self) -> dict:
118
  """Get current queue status."""
119
  with self.lock:
src/envs.py CHANGED
@@ -8,9 +8,10 @@ TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
  # Change these to your organization name
10
  OWNER = "stacklok" # Create this organization on HuggingFace
 
11
  # ----------------------------------
12
 
13
- REPO_ID = f"{OWNER}/secure-llm-leaderboard"
14
  QUEUE_REPO = f"{OWNER}/requests"
15
  RESULTS_REPO = f"{OWNER}/results"
16
 
 
8
 
9
  # Change these to your organization name
10
  OWNER = "stacklok" # Create this organization on HuggingFace
11
+ BOARD_NAME = "secure_code_leaderboard" # The name of the leaderboard board
12
  # ----------------------------------
13
 
14
+ REPO_ID = f"{OWNER}/{BOARD_NAME}"
15
  QUEUE_REPO = f"{OWNER}/requests"
16
  RESULTS_REPO = f"{OWNER}/results"
17
 
src/leaderboard/read_evals.py CHANGED
@@ -43,14 +43,14 @@ class EvalResult:
43
 
44
  model_id = data.get("model_id", "")
45
  org_and_model = model_id.split("/", 1)
46
-
47
  if len(org_and_model) == 1:
48
  org = None
49
  model = org_and_model[0]
50
  else:
51
  org = org_and_model[0]
52
  model = org_and_model[1]
53
-
54
  full_model = model_id
55
  precision = Precision.from_str(data.get("precision", "Unknown"))
56
  result_key = f"{org}_{model}_{precision.value.name}" if org else f"{model}_{precision.value.name}"
@@ -78,7 +78,7 @@ class EvalResult:
78
  precision=precision,
79
  revision=data.get("revision", ""),
80
  still_on_hub=True, # Assuming it's on the hub, you might want to check this
81
- architecture="Unknown", # You might want to add this information to your JSON if needed
82
  security_score=data.get("security_score", 0.0),
83
  safetensors_compliant=data.get("safetensors_compliant", False)
84
  )
@@ -91,7 +91,7 @@ class EvalResult:
91
  with open(request_file, "r") as f:
92
  request = json.load(f)
93
  self.model_type = ModelType.from_str(request.get("model_type", ""))
94
-
95
  # Handle weight type conversion from old format
96
  weight_type = request.get("weight_type", "PyTorch")
97
  if weight_type == "Original":
@@ -99,7 +99,7 @@ class EvalResult:
99
  elif weight_type == "Adapter":
100
  weight_type = "Other"
101
  self.weight_type = WeightType[weight_type]
102
-
103
  self.license = request.get("license", "?")
104
  self.likes = request.get("likes", 0)
105
  self.num_params = request.get("params", 0)
@@ -125,7 +125,7 @@ class EvalResult:
125
  "Security Score ⬆️": f"{self.security_score:.2f}",
126
  "Safetensors": str(self.safetensors_compliant)
127
  }
128
-
129
  # Add benchmark results
130
  for key, value in self.results.items():
131
  data_dict[key] = str(value)
@@ -178,7 +178,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
178
  logger.warning(f"Empty file {model_result_filepath}")
179
  continue
180
  data = json.loads(file_content)
181
-
182
  if not data:
183
  logger.warning(f"No data in file {model_result_filepath}")
184
  continue
@@ -189,7 +189,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
189
  # Creation of result
190
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
191
  logger.info(f"Created EvalResult object: {eval_result}")
192
-
193
  eval_result.update_with_request_file(requests_path)
194
  logger.info(f"Updated EvalResult with request file: {eval_result}")
195
 
@@ -199,7 +199,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
199
  eval_results[eval_name].results.update(eval_result.results)
200
  else:
201
  eval_results[eval_name] = eval_result
202
-
203
  logger.info(f"Processed evaluation result for {eval_name}")
204
  except json.JSONDecodeError as e:
205
  logger.error(f"Error decoding JSON in file {model_result_filepath}: {str(e)}")
 
43
 
44
  model_id = data.get("model_id", "")
45
  org_and_model = model_id.split("/", 1)
46
+
47
  if len(org_and_model) == 1:
48
  org = None
49
  model = org_and_model[0]
50
  else:
51
  org = org_and_model[0]
52
  model = org_and_model[1]
53
+
54
  full_model = model_id
55
  precision = Precision.from_str(data.get("precision", "Unknown"))
56
  result_key = f"{org}_{model}_{precision.value.name}" if org else f"{model}_{precision.value.name}"
 
78
  precision=precision,
79
  revision=data.get("revision", ""),
80
  still_on_hub=True, # Assuming it's on the hub, you might want to check this
81
+ architecture="Unknown", # TODO: Need to get this from the model
82
  security_score=data.get("security_score", 0.0),
83
  safetensors_compliant=data.get("safetensors_compliant", False)
84
  )
 
91
  with open(request_file, "r") as f:
92
  request = json.load(f)
93
  self.model_type = ModelType.from_str(request.get("model_type", ""))
94
+
95
  # Handle weight type conversion from old format
96
  weight_type = request.get("weight_type", "PyTorch")
97
  if weight_type == "Original":
 
99
  elif weight_type == "Adapter":
100
  weight_type = "Other"
101
  self.weight_type = WeightType[weight_type]
102
+
103
  self.license = request.get("license", "?")
104
  self.likes = request.get("likes", 0)
105
  self.num_params = request.get("params", 0)
 
125
  "Security Score ⬆️": f"{self.security_score:.2f}",
126
  "Safetensors": str(self.safetensors_compliant)
127
  }
128
+
129
  # Add benchmark results
130
  for key, value in self.results.items():
131
  data_dict[key] = str(value)
 
178
  logger.warning(f"Empty file {model_result_filepath}")
179
  continue
180
  data = json.loads(file_content)
181
+
182
  if not data:
183
  logger.warning(f"No data in file {model_result_filepath}")
184
  continue
 
189
  # Creation of result
190
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
191
  logger.info(f"Created EvalResult object: {eval_result}")
192
+
193
  eval_result.update_with_request_file(requests_path)
194
  logger.info(f"Updated EvalResult with request file: {eval_result}")
195
 
 
199
  eval_results[eval_name].results.update(eval_result.results)
200
  else:
201
  eval_results[eval_name] = eval_result
202
+
203
  logger.info(f"Processed evaluation result for {eval_name}")
204
  except json.JSONDecodeError as e:
205
  logger.error(f"Error decoding JSON in file {model_result_filepath}: {str(e)}")
src/leaderboard/security_eval.py CHANGED
@@ -90,7 +90,7 @@ def get_model_response(
90
  **inputs,
91
  max_new_tokens=max_length,
92
  num_return_sequences=1,
93
- temperature=0.7 + (attempt * 0.1), # Increase temperature slightly on retries
94
  do_sample=True,
95
  pad_token_id=tokenizer.eos_token_id,
96
  repetition_penalty=1.2,
 
90
  **inputs,
91
  max_new_tokens=max_length,
92
  num_return_sequences=1,
93
+ temperature=0.7, # Increase temperature slightly on retries
94
  do_sample=True,
95
  pad_token_id=tokenizer.eos_token_id,
96
  repetition_penalty=1.2,
src/populate.py CHANGED
@@ -15,17 +15,17 @@ from src.config import RESULTS_REPO, QUEUE_REPO
15
  def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
16
  """Creates a dataframe from all the individual experiment results"""
17
  logger.info(f"Fetching evaluation results from {RESULTS_REPO}")
18
-
19
  api = HfApi()
20
  all_data_json = []
21
 
22
  try:
23
  # List all files in the repository
24
  files = api.list_repo_files(repo_id=RESULTS_REPO, repo_type="dataset")
25
-
26
  # Filter for JSON result files
27
  result_files = [f for f in files if f.endswith('_results.json')]
28
-
29
  for file in result_files:
30
  try:
31
  # Download and read each result file
@@ -73,13 +73,13 @@ def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
73
 
74
  # Select only the columns we want to display
75
  df = df[cols]
76
-
77
  # Round numeric columns
78
  numeric_cols = df.select_dtypes(include=[np.number]).columns
79
  for col in numeric_cols:
80
  df[col] = pd.to_numeric(df[col], errors='coerce')
81
  df[numeric_cols] = df[numeric_cols].round(decimals=2)
82
-
83
  logger.debug(f"DataFrame after column selection and rounding:\n{df}")
84
 
85
  logger.info(f"Final DataFrame has {len(df)} rows")
@@ -96,10 +96,10 @@ def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
96
  try:
97
  # List all files in the repository
98
  files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")
99
-
100
  # Filter for JSON files
101
  json_files = [f for f in files if f.endswith('.json')]
102
-
103
  for file in json_files:
104
  try:
105
  # Download and read each JSON file
@@ -123,12 +123,12 @@ def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
123
  except Exception as e:
124
  logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)
125
 
126
- print(f"Found {len(all_evals)} total eval requests")
127
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
128
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
129
  finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
130
 
131
- print(f"Pending: {len(pending_list)}, Running: {len(running_list)}, Finished: {len(finished_list)}")
132
 
133
  df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
134
  df_running = pd.DataFrame.from_records(running_list, columns=cols)
 
15
  def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
16
  """Creates a dataframe from all the individual experiment results"""
17
  logger.info(f"Fetching evaluation results from {RESULTS_REPO}")
18
+
19
  api = HfApi()
20
  all_data_json = []
21
 
22
  try:
23
  # List all files in the repository
24
  files = api.list_repo_files(repo_id=RESULTS_REPO, repo_type="dataset")
25
+
26
  # Filter for JSON result files
27
  result_files = [f for f in files if f.endswith('_results.json')]
28
+
29
  for file in result_files:
30
  try:
31
  # Download and read each result file
 
73
 
74
  # Select only the columns we want to display
75
  df = df[cols]
76
+
77
  # Round numeric columns
78
  numeric_cols = df.select_dtypes(include=[np.number]).columns
79
  for col in numeric_cols:
80
  df[col] = pd.to_numeric(df[col], errors='coerce')
81
  df[numeric_cols] = df[numeric_cols].round(decimals=2)
82
+
83
  logger.debug(f"DataFrame after column selection and rounding:\n{df}")
84
 
85
  logger.info(f"Final DataFrame has {len(df)} rows")
 
96
  try:
97
  # List all files in the repository
98
  files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")
99
+
100
  # Filter for JSON files
101
  json_files = [f for f in files if f.endswith('.json')]
102
+
103
  for file in json_files:
104
  try:
105
  # Download and read each JSON file
 
123
  except Exception as e:
124
  logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)
125
 
126
+ logger.info(f"Found {len(all_evals)} total eval requests")
127
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
128
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
129
  finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
130
 
131
+ logger.info(f"Pending: {len(pending_list)}, Running: {len(running_list)}, Finished: {len(finished_list)}")
132
 
133
  df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
134
  df_running = pd.DataFrame.from_records(running_list, columns=cols)
src/submission/check_validity.py CHANGED
@@ -1,5 +1,6 @@
1
  import json
2
  import os
 
3
  from collections import defaultdict
4
 
5
  import huggingface_hub
@@ -8,6 +9,8 @@ from huggingface_hub.hf_api import ModelInfo
8
  from transformers import AutoConfig
9
  from transformers.models.auto.tokenization_auto import AutoTokenizer
10
 
 
 
11
  def check_model_card(repo_id: str) -> tuple[bool, str]:
12
  """Checks if the model card and license exist and have been filled"""
13
  try:
@@ -145,7 +148,7 @@ def already_submitted_models(requested_models_dir: str) -> tuple[set[str], defau
145
  organisation, _ = model.split("/")
146
  users_to_submission_dates[organisation].append(info["submitted_time"])
147
  except (json.JSONDecodeError, KeyError, IOError) as e:
148
- print(f"Warning: Skipping malformed file {file}: {str(e)}")
149
  continue
150
 
151
  return set(file_names), users_to_submission_dates
 
1
  import json
2
  import os
3
+ import logging
4
  from collections import defaultdict
5
 
6
  import huggingface_hub
 
9
  from transformers import AutoConfig
10
  from transformers.models.auto.tokenization_auto import AutoTokenizer
11
 
12
+ logger = logging.getLogger(__name__)
13
+
14
  def check_model_card(repo_id: str) -> tuple[bool, str]:
15
  """Checks if the model card and license exist and have been filled"""
16
  try:
 
148
  organisation, _ = model.split("/")
149
  users_to_submission_dates[organisation].append(info["submitted_time"])
150
  except (json.JSONDecodeError, KeyError, IOError) as e:
151
+ logger.warning(f"Skipping malformed file {file}: {str(e)}")
152
  continue
153
 
154
  return set(file_names), users_to_submission_dates