|
"""Tests for core evaluation functionality.""" |
|
import pytest |
|
import os |
|
import json |
|
from datetime import datetime |
|
from src.core.evaluation import EvaluationManager, EvaluationRequest |
|
|
|
@pytest.fixture |
|
def evaluation_manager(tmp_path): |
|
"""Create evaluation manager with temporary directories.""" |
|
results_dir = tmp_path / "results" |
|
backup_dir = tmp_path / "backups" |
|
return EvaluationManager(str(results_dir), str(backup_dir)) |
|
|
|
def test_evaluation_manager_init(evaluation_manager): |
|
"""Test evaluation manager initialization.""" |
|
assert os.path.exists(evaluation_manager.results_dir) |
|
assert os.path.exists(evaluation_manager.backup_dir) |
|
|
|
def test_backup_results(evaluation_manager): |
|
"""Test backup creation.""" |
|
|
|
eval_id = "test_model_main" |
|
result_path = os.path.join(evaluation_manager.results_dir, f"{eval_id}.json") |
|
test_results = {"test": "data"} |
|
|
|
os.makedirs(os.path.dirname(result_path), exist_ok=True) |
|
with open(result_path, 'w') as f: |
|
json.dump(test_results, f) |
|
|
|
|
|
evaluation_manager.backup_results(eval_id) |
|
|
|
|
|
backup_files = os.listdir(evaluation_manager.backup_dir) |
|
assert len(backup_files) == 1 |
|
assert backup_files[0].startswith(eval_id) |
|
|
|
def test_run_evaluation(evaluation_manager): |
|
"""Test full evaluation run.""" |
|
request = EvaluationRequest( |
|
model_id="hf-internal-testing/tiny-random-gpt2", |
|
revision="main", |
|
precision="float16", |
|
weight_type="Safetensors", |
|
submitted_time=datetime.now() |
|
) |
|
|
|
results = evaluation_manager.run_evaluation(request) |
|
|
|
assert results["model_id"] == request.model_id |
|
assert results["revision"] == request.revision |
|
assert "security_score" in results |
|
assert "safetensors_compliant" in results |
|
|
|
def test_evaluation_error_handling(evaluation_manager): |
|
"""Test error handling during evaluation.""" |
|
request = EvaluationRequest( |
|
model_id="invalid/model", |
|
revision="main", |
|
precision="float16", |
|
weight_type="Safetensors", |
|
submitted_time=datetime.now() |
|
) |
|
|
|
with pytest.raises(Exception): |
|
evaluation_manager.run_evaluation(request) |
|
|
|
def test_concurrent_evaluations(evaluation_manager, tmp_path): |
|
"""Test handling of concurrent evaluations.""" |
|
import threading |
|
import time |
|
|
|
def run_eval(model_id): |
|
request = EvaluationRequest( |
|
model_id=model_id, |
|
revision="main", |
|
precision="float16", |
|
weight_type="Safetensors", |
|
submitted_time=datetime.now() |
|
) |
|
try: |
|
evaluation_manager.run_evaluation(request) |
|
except Exception: |
|
pass |
|
|
|
|
|
threads = [] |
|
for i in range(3): |
|
thread = threading.Thread( |
|
target=run_eval, |
|
args=(f"model_{i}",) |
|
) |
|
threads.append(thread) |
|
thread.start() |
|
|
|
|
|
for thread in threads: |
|
thread.join() |
|
|
|
|
|
assert os.path.exists(evaluation_manager.results_dir) |
|
assert os.path.exists(evaluation_manager.backup_dir) |
|
|