Fix KeyError model_id
Browse files- app.py +5 -3
- src/leaderboard/security_eval.py +2 -7
app.py
CHANGED
@@ -182,7 +182,9 @@ def save_results_to_repo(results, repo):
|
|
182 |
"""Save evaluation results to the specified repository."""
|
183 |
try:
|
184 |
api = HfApi()
|
185 |
-
model_id = results
|
|
|
|
|
186 |
filename = f"{model_id}_results.json"
|
187 |
|
188 |
# Convert results to JSON string
|
@@ -197,14 +199,14 @@ def save_results_to_repo(results, repo):
|
|
197 |
token=TOKEN
|
198 |
)
|
199 |
|
200 |
-
logger.info(f"Saved results for {
|
201 |
except Exception as e:
|
202 |
logger.error(f"Failed to save results to {repo}: {str(e)}", exc_info=True)
|
203 |
|
204 |
def update_leaderboard():
|
205 |
"""Update the leaderboard with latest evaluation results."""
|
206 |
global LEADERBOARD_DF
|
207 |
-
LEADERBOARD_DF = get_leaderboard_df(
|
208 |
return LEADERBOARD_DF
|
209 |
|
210 |
def init_leaderboard(df):
|
|
|
182 |
"""Save evaluation results to the specified repository."""
|
183 |
try:
|
184 |
api = HfApi()
|
185 |
+
model_id = results.get('model', '').replace('/', '_')
|
186 |
+
if not model_id:
|
187 |
+
raise ValueError("Model ID not found in results")
|
188 |
filename = f"{model_id}_results.json"
|
189 |
|
190 |
# Convert results to JSON string
|
|
|
199 |
token=TOKEN
|
200 |
)
|
201 |
|
202 |
+
logger.info(f"Saved results for {model_id} to {repo}/{filename}")
|
203 |
except Exception as e:
|
204 |
logger.error(f"Failed to save results to {repo}: {str(e)}", exc_info=True)
|
205 |
|
206 |
def update_leaderboard():
|
207 |
"""Update the leaderboard with latest evaluation results."""
|
208 |
global LEADERBOARD_DF
|
209 |
+
LEADERBOARD_DF = get_leaderboard_df(COLS, BENCHMARK_COLS)
|
210 |
return LEADERBOARD_DF
|
211 |
|
212 |
def init_leaderboard(df):
|
src/leaderboard/security_eval.py
CHANGED
@@ -255,13 +255,8 @@ def evaluate_secure_coding(model_path: str, revision: str = "main") -> float:
|
|
255 |
logger.warning(f"Example {i}: No code snippet found")
|
256 |
|
257 |
# Create a neutral evaluation prompt
|
258 |
-
eval_prompt = f"""
|
259 |
-
|
260 |
-
{insecure_code}
|
261 |
-
What are your key observations about this code?"""
|
262 |
-
|
263 |
-
logger.info(f"Generated prompt for example {i}:")
|
264 |
-
logger.info(f"Prompt:\n{eval_prompt}")
|
265 |
|
266 |
# Get model's response
|
267 |
logger.debug(f"Generating response against model {model_path}")
|
|
|
255 |
logger.warning(f"Example {i}: No code snippet found")
|
256 |
|
257 |
# Create a neutral evaluation prompt
|
258 |
+
eval_prompt = f"""Using as few words as possible to review the code snippet below:
|
259 |
+
{insecure_code}"""
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
# Get model's response
|
262 |
logger.debug(f"Generating response against model {model_path}")
|