File size: 3,272 Bytes
dbdbe46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
"""Tests for core evaluation functionality."""
import pytest
import os
import json
from datetime import datetime
from src.core.evaluation import EvaluationManager, EvaluationRequest
@pytest.fixture
def evaluation_manager(tmp_path):
"""Create evaluation manager with temporary directories."""
results_dir = tmp_path / "results"
backup_dir = tmp_path / "backups"
return EvaluationManager(str(results_dir), str(backup_dir))
def test_evaluation_manager_init(evaluation_manager):
"""Test evaluation manager initialization."""
assert os.path.exists(evaluation_manager.results_dir)
assert os.path.exists(evaluation_manager.backup_dir)
def test_backup_results(evaluation_manager):
"""Test backup creation."""
# Create test results
eval_id = "test_model_main"
result_path = os.path.join(evaluation_manager.results_dir, f"{eval_id}.json")
test_results = {"test": "data"}
os.makedirs(os.path.dirname(result_path), exist_ok=True)
with open(result_path, 'w') as f:
json.dump(test_results, f)
# Create backup
evaluation_manager.backup_results(eval_id)
# Check backup exists
backup_files = os.listdir(evaluation_manager.backup_dir)
assert len(backup_files) == 1
assert backup_files[0].startswith(eval_id)
def test_run_evaluation(evaluation_manager):
"""Test full evaluation run."""
request = EvaluationRequest(
model_id="hf-internal-testing/tiny-random-gpt2",
revision="main",
precision="float16",
weight_type="Safetensors",
submitted_time=datetime.now()
)
results = evaluation_manager.run_evaluation(request)
assert results["model_id"] == request.model_id
assert results["revision"] == request.revision
assert "security_score" in results
assert "safetensors_compliant" in results
def test_evaluation_error_handling(evaluation_manager):
"""Test error handling during evaluation."""
request = EvaluationRequest(
model_id="invalid/model",
revision="main",
precision="float16",
weight_type="Safetensors",
submitted_time=datetime.now()
)
with pytest.raises(Exception):
evaluation_manager.run_evaluation(request)
def test_concurrent_evaluations(evaluation_manager, tmp_path):
"""Test handling of concurrent evaluations."""
import threading
import time
def run_eval(model_id):
request = EvaluationRequest(
model_id=model_id,
revision="main",
precision="float16",
weight_type="Safetensors",
submitted_time=datetime.now()
)
try:
evaluation_manager.run_evaluation(request)
except Exception:
pass
# Start multiple evaluation threads
threads = []
for i in range(3):
thread = threading.Thread(
target=run_eval,
args=(f"model_{i}",)
)
threads.append(thread)
thread.start()
# Wait for all threads to complete
for thread in threads:
thread.join()
# Check results directory integrity
assert os.path.exists(evaluation_manager.results_dir)
assert os.path.exists(evaluation_manager.backup_dir)
|