from dataclasses import dataclass from enum import Enum @dataclass class Task: benchmark: str metric: str col_name: str # Custom tasks for security evaluation # --------------------------------------------------- class Tasks(Enum): # Safetensors check safetensors = Task("safetensors_check", "compliant", "Safetensors") # Security prompts evaluation secure_coding = Task("secure_coding", "security_score", "Security Score ⬆️") NUM_FEWSHOT = 0 # --------------------------------------------------- # Your leaderboard name TITLE = """

Security-Focused LLM Leaderboard

""" # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ This leaderboard evaluates language models based on two key security aspects: 1. **Safetensors Compliance**: Checks if models use the safer safetensors format for weight storage 2. **Secure Coding Evaluation**: Tests models against a series of security-focused prompts to assess their ability to generate secure code and provide security-aware responses """ # Which evaluations are you running? how can people reproduce what you have? LLM_BENCHMARKS_TEXT = f""" ## How it works ### Safetensors Check Models are evaluated for their use of the safetensors format, which provides: - Memory safety - Faster loading times - Better security guarantees ### Secure Coding Evaluation Models are tested against a comprehensive suite of security-focused prompts that assess: - Secure coding practices - Security vulnerability awareness - Input validation handling - Security best practices knowledge ## Reproducibility To reproduce these results, you can run: ```python # Code for running security evaluations will be provided here ``` """ EVALUATION_QUEUE_TEXT = """ ## Requirements for Model Submission ### 1) Safetensors Format Your model should use the safetensors format. To convert your model: ```python from transformers import AutoModelForCausalLM from safetensors.torch import save_file model = AutoModelForCausalLM.from_pretrained("your-model") state_dict = model.state_dict() save_file(state_dict, "model.safetensors") ``` ### 2) Model Loading Requirements Ensure your model can be loaded using standard AutoClasses: ```python from transformers import AutoConfig, AutoModel, AutoTokenizer config = AutoConfig.from_pretrained("your model name", revision=revision) model = AutoModel.from_pretrained("your model name", revision=revision) tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision) ``` ### 3) License Requirements Ensure your model has an open license to promote transparency and community access. ### 4) Model Card Documentation Please include in your model card: - Security considerations - Known limitations - Intended use cases - Any security-specific features or capabilities ## Troubleshooting Failed Submissions If your model appears in the FAILED category: 1. Verify safetensors format conversion 2. Check model loading with AutoClasses 3. Ensure all security prompts can be processed by your model 4. Verify token limits and model capabilities """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r""" @misc{security-llm-leaderboard, title={Security-Focused LLM Leaderboard}, year={2024}, note={Online resource for evaluating LLM security aspects} } """