Change all print instances to logs
Browse files- src/core/queue_manager.py +12 -12
- src/envs.py +2 -1
- src/leaderboard/read_evals.py +9 -9
- src/leaderboard/security_eval.py +1 -1
- src/populate.py +9 -9
- src/submission/check_validity.py +4 -1
src/core/queue_manager.py
CHANGED
@@ -19,17 +19,17 @@ class QueueItem:
|
|
19 |
request_id: str
|
20 |
model_id: str
|
21 |
revision: str
|
22 |
-
|
23 |
class QueueManager:
|
24 |
"""Manages evaluation request queue with persistence."""
|
25 |
-
|
26 |
def __init__(self, queue_dir: str):
|
27 |
self.queue_dir = queue_dir
|
28 |
self.queue = PriorityQueue()
|
29 |
self.active_evaluations: List[str] = []
|
30 |
self.lock = Lock()
|
31 |
self._load_persisted_queue()
|
32 |
-
|
33 |
def _load_persisted_queue(self) -> None:
|
34 |
"""Load persisted queue items from disk."""
|
35 |
try:
|
@@ -48,14 +48,14 @@ class QueueManager:
|
|
48 |
logger.info(f"Loaded {self.queue.qsize()} items from persisted queue")
|
49 |
except Exception as e:
|
50 |
logger.error(f"Failed to load persisted queue: {str(e)}")
|
51 |
-
|
52 |
def _persist_queue(self) -> None:
|
53 |
"""Persist current queue state to disk."""
|
54 |
try:
|
55 |
# Create a list of all queue items
|
56 |
items = []
|
57 |
temp_queue = PriorityQueue()
|
58 |
-
|
59 |
while not self.queue.empty():
|
60 |
item = self.queue.get()
|
61 |
items.append({
|
@@ -66,20 +66,20 @@ class QueueManager:
|
|
66 |
'revision': item.revision
|
67 |
})
|
68 |
temp_queue.put(item)
|
69 |
-
|
70 |
# Restore queue
|
71 |
self.queue = temp_queue
|
72 |
-
|
73 |
# Save to disk
|
74 |
os.makedirs(self.queue_dir, exist_ok=True)
|
75 |
queue_file = os.path.join(self.queue_dir, "queue_state.json")
|
76 |
with open(queue_file, 'w') as f:
|
77 |
json.dump(items, f, indent=2)
|
78 |
-
|
79 |
logger.info(f"Persisted {len(items)} items to queue state")
|
80 |
except Exception as e:
|
81 |
logger.error(f"Failed to persist queue: {str(e)}")
|
82 |
-
|
83 |
def add_request(self, model_id: str, revision: str, priority: int = 1) -> str:
|
84 |
"""Add new evaluation request to queue."""
|
85 |
with self.lock:
|
@@ -95,7 +95,7 @@ class QueueManager:
|
|
95 |
self._persist_queue()
|
96 |
logger.info(f"Added request {request_id} to queue")
|
97 |
return request_id
|
98 |
-
|
99 |
def get_next_request(self) -> Optional[QueueItem]:
|
100 |
"""Get next request from queue."""
|
101 |
with self.lock:
|
@@ -106,14 +106,14 @@ class QueueManager:
|
|
106 |
logger.info(f"Retrieved request {item.request_id} from queue")
|
107 |
return item
|
108 |
return None
|
109 |
-
|
110 |
def mark_complete(self, request_id: str) -> None:
|
111 |
"""Mark evaluation request as complete."""
|
112 |
with self.lock:
|
113 |
if request_id in self.active_evaluations:
|
114 |
self.active_evaluations.remove(request_id)
|
115 |
logger.info(f"Marked request {request_id} as complete")
|
116 |
-
|
117 |
def get_queue_status(self) -> dict:
|
118 |
"""Get current queue status."""
|
119 |
with self.lock:
|
|
|
19 |
request_id: str
|
20 |
model_id: str
|
21 |
revision: str
|
22 |
+
|
23 |
class QueueManager:
|
24 |
"""Manages evaluation request queue with persistence."""
|
25 |
+
|
26 |
def __init__(self, queue_dir: str):
|
27 |
self.queue_dir = queue_dir
|
28 |
self.queue = PriorityQueue()
|
29 |
self.active_evaluations: List[str] = []
|
30 |
self.lock = Lock()
|
31 |
self._load_persisted_queue()
|
32 |
+
|
33 |
def _load_persisted_queue(self) -> None:
|
34 |
"""Load persisted queue items from disk."""
|
35 |
try:
|
|
|
48 |
logger.info(f"Loaded {self.queue.qsize()} items from persisted queue")
|
49 |
except Exception as e:
|
50 |
logger.error(f"Failed to load persisted queue: {str(e)}")
|
51 |
+
|
52 |
def _persist_queue(self) -> None:
|
53 |
"""Persist current queue state to disk."""
|
54 |
try:
|
55 |
# Create a list of all queue items
|
56 |
items = []
|
57 |
temp_queue = PriorityQueue()
|
58 |
+
|
59 |
while not self.queue.empty():
|
60 |
item = self.queue.get()
|
61 |
items.append({
|
|
|
66 |
'revision': item.revision
|
67 |
})
|
68 |
temp_queue.put(item)
|
69 |
+
|
70 |
# Restore queue
|
71 |
self.queue = temp_queue
|
72 |
+
|
73 |
# Save to disk
|
74 |
os.makedirs(self.queue_dir, exist_ok=True)
|
75 |
queue_file = os.path.join(self.queue_dir, "queue_state.json")
|
76 |
with open(queue_file, 'w') as f:
|
77 |
json.dump(items, f, indent=2)
|
78 |
+
|
79 |
logger.info(f"Persisted {len(items)} items to queue state")
|
80 |
except Exception as e:
|
81 |
logger.error(f"Failed to persist queue: {str(e)}")
|
82 |
+
|
83 |
def add_request(self, model_id: str, revision: str, priority: int = 1) -> str:
|
84 |
"""Add new evaluation request to queue."""
|
85 |
with self.lock:
|
|
|
95 |
self._persist_queue()
|
96 |
logger.info(f"Added request {request_id} to queue")
|
97 |
return request_id
|
98 |
+
|
99 |
def get_next_request(self) -> Optional[QueueItem]:
|
100 |
"""Get next request from queue."""
|
101 |
with self.lock:
|
|
|
106 |
logger.info(f"Retrieved request {item.request_id} from queue")
|
107 |
return item
|
108 |
return None
|
109 |
+
|
110 |
def mark_complete(self, request_id: str) -> None:
|
111 |
"""Mark evaluation request as complete."""
|
112 |
with self.lock:
|
113 |
if request_id in self.active_evaluations:
|
114 |
self.active_evaluations.remove(request_id)
|
115 |
logger.info(f"Marked request {request_id} as complete")
|
116 |
+
|
117 |
def get_queue_status(self) -> dict:
|
118 |
"""Get current queue status."""
|
119 |
with self.lock:
|
src/envs.py
CHANGED
@@ -8,9 +8,10 @@ TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
|
8 |
|
9 |
# Change these to your organization name
|
10 |
OWNER = "stacklok" # Create this organization on HuggingFace
|
|
|
11 |
# ----------------------------------
|
12 |
|
13 |
-
REPO_ID = f"{OWNER}/
|
14 |
QUEUE_REPO = f"{OWNER}/requests"
|
15 |
RESULTS_REPO = f"{OWNER}/results"
|
16 |
|
|
|
8 |
|
9 |
# Change these to your organization name
|
10 |
OWNER = "stacklok" # Create this organization on HuggingFace
|
11 |
+
BOARD_NAME = "secure_code_leaderboard" # The name of the leaderboard board
|
12 |
# ----------------------------------
|
13 |
|
14 |
+
REPO_ID = f"{OWNER}/{BOARD_NAME}"
|
15 |
QUEUE_REPO = f"{OWNER}/requests"
|
16 |
RESULTS_REPO = f"{OWNER}/results"
|
17 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -43,14 +43,14 @@ class EvalResult:
|
|
43 |
|
44 |
model_id = data.get("model_id", "")
|
45 |
org_and_model = model_id.split("/", 1)
|
46 |
-
|
47 |
if len(org_and_model) == 1:
|
48 |
org = None
|
49 |
model = org_and_model[0]
|
50 |
else:
|
51 |
org = org_and_model[0]
|
52 |
model = org_and_model[1]
|
53 |
-
|
54 |
full_model = model_id
|
55 |
precision = Precision.from_str(data.get("precision", "Unknown"))
|
56 |
result_key = f"{org}_{model}_{precision.value.name}" if org else f"{model}_{precision.value.name}"
|
@@ -78,7 +78,7 @@ class EvalResult:
|
|
78 |
precision=precision,
|
79 |
revision=data.get("revision", ""),
|
80 |
still_on_hub=True, # Assuming it's on the hub, you might want to check this
|
81 |
-
architecture="Unknown", #
|
82 |
security_score=data.get("security_score", 0.0),
|
83 |
safetensors_compliant=data.get("safetensors_compliant", False)
|
84 |
)
|
@@ -91,7 +91,7 @@ class EvalResult:
|
|
91 |
with open(request_file, "r") as f:
|
92 |
request = json.load(f)
|
93 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
94 |
-
|
95 |
# Handle weight type conversion from old format
|
96 |
weight_type = request.get("weight_type", "PyTorch")
|
97 |
if weight_type == "Original":
|
@@ -99,7 +99,7 @@ class EvalResult:
|
|
99 |
elif weight_type == "Adapter":
|
100 |
weight_type = "Other"
|
101 |
self.weight_type = WeightType[weight_type]
|
102 |
-
|
103 |
self.license = request.get("license", "?")
|
104 |
self.likes = request.get("likes", 0)
|
105 |
self.num_params = request.get("params", 0)
|
@@ -125,7 +125,7 @@ class EvalResult:
|
|
125 |
"Security Score ⬆️": f"{self.security_score:.2f}",
|
126 |
"Safetensors": str(self.safetensors_compliant)
|
127 |
}
|
128 |
-
|
129 |
# Add benchmark results
|
130 |
for key, value in self.results.items():
|
131 |
data_dict[key] = str(value)
|
@@ -178,7 +178,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
178 |
logger.warning(f"Empty file {model_result_filepath}")
|
179 |
continue
|
180 |
data = json.loads(file_content)
|
181 |
-
|
182 |
if not data:
|
183 |
logger.warning(f"No data in file {model_result_filepath}")
|
184 |
continue
|
@@ -189,7 +189,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
189 |
# Creation of result
|
190 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
191 |
logger.info(f"Created EvalResult object: {eval_result}")
|
192 |
-
|
193 |
eval_result.update_with_request_file(requests_path)
|
194 |
logger.info(f"Updated EvalResult with request file: {eval_result}")
|
195 |
|
@@ -199,7 +199,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
199 |
eval_results[eval_name].results.update(eval_result.results)
|
200 |
else:
|
201 |
eval_results[eval_name] = eval_result
|
202 |
-
|
203 |
logger.info(f"Processed evaluation result for {eval_name}")
|
204 |
except json.JSONDecodeError as e:
|
205 |
logger.error(f"Error decoding JSON in file {model_result_filepath}: {str(e)}")
|
|
|
43 |
|
44 |
model_id = data.get("model_id", "")
|
45 |
org_and_model = model_id.split("/", 1)
|
46 |
+
|
47 |
if len(org_and_model) == 1:
|
48 |
org = None
|
49 |
model = org_and_model[0]
|
50 |
else:
|
51 |
org = org_and_model[0]
|
52 |
model = org_and_model[1]
|
53 |
+
|
54 |
full_model = model_id
|
55 |
precision = Precision.from_str(data.get("precision", "Unknown"))
|
56 |
result_key = f"{org}_{model}_{precision.value.name}" if org else f"{model}_{precision.value.name}"
|
|
|
78 |
precision=precision,
|
79 |
revision=data.get("revision", ""),
|
80 |
still_on_hub=True, # Assuming it's on the hub, you might want to check this
|
81 |
+
architecture="Unknown", # TODO: Need to get this from the model
|
82 |
security_score=data.get("security_score", 0.0),
|
83 |
safetensors_compliant=data.get("safetensors_compliant", False)
|
84 |
)
|
|
|
91 |
with open(request_file, "r") as f:
|
92 |
request = json.load(f)
|
93 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
94 |
+
|
95 |
# Handle weight type conversion from old format
|
96 |
weight_type = request.get("weight_type", "PyTorch")
|
97 |
if weight_type == "Original":
|
|
|
99 |
elif weight_type == "Adapter":
|
100 |
weight_type = "Other"
|
101 |
self.weight_type = WeightType[weight_type]
|
102 |
+
|
103 |
self.license = request.get("license", "?")
|
104 |
self.likes = request.get("likes", 0)
|
105 |
self.num_params = request.get("params", 0)
|
|
|
125 |
"Security Score ⬆️": f"{self.security_score:.2f}",
|
126 |
"Safetensors": str(self.safetensors_compliant)
|
127 |
}
|
128 |
+
|
129 |
# Add benchmark results
|
130 |
for key, value in self.results.items():
|
131 |
data_dict[key] = str(value)
|
|
|
178 |
logger.warning(f"Empty file {model_result_filepath}")
|
179 |
continue
|
180 |
data = json.loads(file_content)
|
181 |
+
|
182 |
if not data:
|
183 |
logger.warning(f"No data in file {model_result_filepath}")
|
184 |
continue
|
|
|
189 |
# Creation of result
|
190 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
191 |
logger.info(f"Created EvalResult object: {eval_result}")
|
192 |
+
|
193 |
eval_result.update_with_request_file(requests_path)
|
194 |
logger.info(f"Updated EvalResult with request file: {eval_result}")
|
195 |
|
|
|
199 |
eval_results[eval_name].results.update(eval_result.results)
|
200 |
else:
|
201 |
eval_results[eval_name] = eval_result
|
202 |
+
|
203 |
logger.info(f"Processed evaluation result for {eval_name}")
|
204 |
except json.JSONDecodeError as e:
|
205 |
logger.error(f"Error decoding JSON in file {model_result_filepath}: {str(e)}")
|
src/leaderboard/security_eval.py
CHANGED
@@ -90,7 +90,7 @@ def get_model_response(
|
|
90 |
**inputs,
|
91 |
max_new_tokens=max_length,
|
92 |
num_return_sequences=1,
|
93 |
-
temperature=0.7
|
94 |
do_sample=True,
|
95 |
pad_token_id=tokenizer.eos_token_id,
|
96 |
repetition_penalty=1.2,
|
|
|
90 |
**inputs,
|
91 |
max_new_tokens=max_length,
|
92 |
num_return_sequences=1,
|
93 |
+
temperature=0.7, # Increase temperature slightly on retries
|
94 |
do_sample=True,
|
95 |
pad_token_id=tokenizer.eos_token_id,
|
96 |
repetition_penalty=1.2,
|
src/populate.py
CHANGED
@@ -15,17 +15,17 @@ from src.config import RESULTS_REPO, QUEUE_REPO
|
|
15 |
def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
|
16 |
"""Creates a dataframe from all the individual experiment results"""
|
17 |
logger.info(f"Fetching evaluation results from {RESULTS_REPO}")
|
18 |
-
|
19 |
api = HfApi()
|
20 |
all_data_json = []
|
21 |
|
22 |
try:
|
23 |
# List all files in the repository
|
24 |
files = api.list_repo_files(repo_id=RESULTS_REPO, repo_type="dataset")
|
25 |
-
|
26 |
# Filter for JSON result files
|
27 |
result_files = [f for f in files if f.endswith('_results.json')]
|
28 |
-
|
29 |
for file in result_files:
|
30 |
try:
|
31 |
# Download and read each result file
|
@@ -73,13 +73,13 @@ def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
|
|
73 |
|
74 |
# Select only the columns we want to display
|
75 |
df = df[cols]
|
76 |
-
|
77 |
# Round numeric columns
|
78 |
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
79 |
for col in numeric_cols:
|
80 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
81 |
df[numeric_cols] = df[numeric_cols].round(decimals=2)
|
82 |
-
|
83 |
logger.debug(f"DataFrame after column selection and rounding:\n{df}")
|
84 |
|
85 |
logger.info(f"Final DataFrame has {len(df)} rows")
|
@@ -96,10 +96,10 @@ def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
|
|
96 |
try:
|
97 |
# List all files in the repository
|
98 |
files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")
|
99 |
-
|
100 |
# Filter for JSON files
|
101 |
json_files = [f for f in files if f.endswith('.json')]
|
102 |
-
|
103 |
for file in json_files:
|
104 |
try:
|
105 |
# Download and read each JSON file
|
@@ -123,12 +123,12 @@ def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
|
|
123 |
except Exception as e:
|
124 |
logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)
|
125 |
|
126 |
-
|
127 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
128 |
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
129 |
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
|
130 |
|
131 |
-
|
132 |
|
133 |
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
|
134 |
df_running = pd.DataFrame.from_records(running_list, columns=cols)
|
|
|
15 |
def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
|
16 |
"""Creates a dataframe from all the individual experiment results"""
|
17 |
logger.info(f"Fetching evaluation results from {RESULTS_REPO}")
|
18 |
+
|
19 |
api = HfApi()
|
20 |
all_data_json = []
|
21 |
|
22 |
try:
|
23 |
# List all files in the repository
|
24 |
files = api.list_repo_files(repo_id=RESULTS_REPO, repo_type="dataset")
|
25 |
+
|
26 |
# Filter for JSON result files
|
27 |
result_files = [f for f in files if f.endswith('_results.json')]
|
28 |
+
|
29 |
for file in result_files:
|
30 |
try:
|
31 |
# Download and read each result file
|
|
|
73 |
|
74 |
# Select only the columns we want to display
|
75 |
df = df[cols]
|
76 |
+
|
77 |
# Round numeric columns
|
78 |
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
79 |
for col in numeric_cols:
|
80 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
81 |
df[numeric_cols] = df[numeric_cols].round(decimals=2)
|
82 |
+
|
83 |
logger.debug(f"DataFrame after column selection and rounding:\n{df}")
|
84 |
|
85 |
logger.info(f"Final DataFrame has {len(df)} rows")
|
|
|
96 |
try:
|
97 |
# List all files in the repository
|
98 |
files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")
|
99 |
+
|
100 |
# Filter for JSON files
|
101 |
json_files = [f for f in files if f.endswith('.json')]
|
102 |
+
|
103 |
for file in json_files:
|
104 |
try:
|
105 |
# Download and read each JSON file
|
|
|
123 |
except Exception as e:
|
124 |
logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)
|
125 |
|
126 |
+
logger.info(f"Found {len(all_evals)} total eval requests")
|
127 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
128 |
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
129 |
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
|
130 |
|
131 |
+
logger.info(f"Pending: {len(pending_list)}, Running: {len(running_list)}, Finished: {len(finished_list)}")
|
132 |
|
133 |
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
|
134 |
df_running = pd.DataFrame.from_records(running_list, columns=cols)
|
src/submission/check_validity.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
from collections import defaultdict
|
4 |
|
5 |
import huggingface_hub
|
@@ -8,6 +9,8 @@ from huggingface_hub.hf_api import ModelInfo
|
|
8 |
from transformers import AutoConfig
|
9 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
10 |
|
|
|
|
|
11 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
12 |
"""Checks if the model card and license exist and have been filled"""
|
13 |
try:
|
@@ -145,7 +148,7 @@ def already_submitted_models(requested_models_dir: str) -> tuple[set[str], defau
|
|
145 |
organisation, _ = model.split("/")
|
146 |
users_to_submission_dates[organisation].append(info["submitted_time"])
|
147 |
except (json.JSONDecodeError, KeyError, IOError) as e:
|
148 |
-
|
149 |
continue
|
150 |
|
151 |
return set(file_names), users_to_submission_dates
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
import logging
|
4 |
from collections import defaultdict
|
5 |
|
6 |
import huggingface_hub
|
|
|
9 |
from transformers import AutoConfig
|
10 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
11 |
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
15 |
"""Checks if the model card and license exist and have been filled"""
|
16 |
try:
|
|
|
148 |
organisation, _ = model.split("/")
|
149 |
users_to_submission_dates[organisation].append(info["submitted_time"])
|
150 |
except (json.JSONDecodeError, KeyError, IOError) as e:
|
151 |
+
logger.warning(f"Skipping malformed file {file}: {str(e)}")
|
152 |
continue
|
153 |
|
154 |
return set(file_names), users_to_submission_dates
|