"""Tests for core evaluation functionality.""" import pytest import os import json from datetime import datetime from src.core.evaluation import EvaluationManager, EvaluationRequest @pytest.fixture def evaluation_manager(tmp_path): """Create evaluation manager with temporary directories.""" results_dir = tmp_path / "results" backup_dir = tmp_path / "backups" return EvaluationManager(str(results_dir), str(backup_dir)) def test_evaluation_manager_init(evaluation_manager): """Test evaluation manager initialization.""" assert os.path.exists(evaluation_manager.results_dir) assert os.path.exists(evaluation_manager.backup_dir) def test_backup_results(evaluation_manager): """Test backup creation.""" # Create test results eval_id = "test_model_main" result_path = os.path.join(evaluation_manager.results_dir, f"{eval_id}.json") test_results = {"test": "data"} os.makedirs(os.path.dirname(result_path), exist_ok=True) with open(result_path, 'w') as f: json.dump(test_results, f) # Create backup evaluation_manager.backup_results(eval_id) # Check backup exists backup_files = os.listdir(evaluation_manager.backup_dir) assert len(backup_files) == 1 assert backup_files[0].startswith(eval_id) def test_run_evaluation(evaluation_manager): """Test full evaluation run.""" request = EvaluationRequest( model_id="hf-internal-testing/tiny-random-gpt2", revision="main", precision="float16", weight_type="Safetensors", submitted_time=datetime.now() ) results = evaluation_manager.run_evaluation(request) assert results["model_id"] == request.model_id assert results["revision"] == request.revision assert "security_score" in results assert "safetensors_compliant" in results def test_evaluation_error_handling(evaluation_manager): """Test error handling during evaluation.""" request = EvaluationRequest( model_id="invalid/model", revision="main", precision="float16", weight_type="Safetensors", submitted_time=datetime.now() ) with pytest.raises(Exception): evaluation_manager.run_evaluation(request) def test_concurrent_evaluations(evaluation_manager, tmp_path): """Test handling of concurrent evaluations.""" import threading import time def run_eval(model_id): request = EvaluationRequest( model_id=model_id, revision="main", precision="float16", weight_type="Safetensors", submitted_time=datetime.now() ) try: evaluation_manager.run_evaluation(request) except Exception: pass # Start multiple evaluation threads threads = [] for i in range(3): thread = threading.Thread( target=run_eval, args=(f"model_{i}",) ) threads.append(thread) thread.start() # Wait for all threads to complete for thread in threads: thread.join() # Check results directory integrity assert os.path.exists(evaluation_manager.results_dir) assert os.path.exists(evaluation_manager.backup_dir)