mport torch import numpy as np from sklearn.metrics import accuracy_score # Example metric # Load your hidden test set (adjust path and format to your data) TEST_DATA_PATH = "test_data.pt" # Replace with the actual path TEST_LABELS_PATH = "test_labels.pt" test_data = torch.load(TEST_DATA_PATH) test_labels = torch.load(TEST_LABELS_PATH) # Evaluation script entry point def evaluate_submission(model_checkpoint_path: str): """ Evaluates the submitted model on the hidden test set. Args: model_checkpoint_path (str): Path to the submitted model checkpoint. Returns: dict: A dictionary containing the evaluation metrics. """ # Load the participant's model model = torch.load(model_checkpoint_path) model.eval() # Move model and data to the appropriate device (e.g., GPU if available) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) test_data_tensor = test_data.to(device) # Perform inference with torch.no_grad(): predictions = model(test_data_tensor) predictions = torch.argmax(predictions, axis=1).cpu().numpy() # Calculate evaluation metric (e.g., accuracy) accuracy = accuracy_score(test_labels, predictions) return {"accuracy": accuracy} # Replace with other metrics as needed if __name__ == "__main__": # For local testing, you can pass a sample model path here sample_model_path = "sample_submission.pt" # Replace with a test checkpoint result = evaluate_submission(sample_model_path) print(result)