lukehinds's picture
Fix 'AutoEvalColumn' has no attribute
b257b3e
raw
history blame
3.39 kB
from dataclasses import dataclass
from enum import Enum
@dataclass
class Task:
benchmark: str
metric: str
col_name: str
# Custom tasks for security evaluation
# ---------------------------------------------------
class Tasks(Enum):
# Safetensors check
safetensors = Task("safetensors_check", "compliant", "Safetensors")
# Security prompts evaluation
secure_coding = Task("secure_coding", "security_score", "Security Score ⬆️")
NUM_FEWSHOT = 0
# ---------------------------------------------------
# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">Security-Focused LLM Leaderboard</h1>"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
This leaderboard evaluates language models based on two key security aspects:
1. **Safetensors Compliance**: Checks if models use the safer safetensors format for weight storage
2. **Secure Coding Evaluation**: Tests models against a series of security-focused prompts to assess their ability to generate secure code and provide security-aware responses
"""
# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## How it works
### Safetensors Check
Models are evaluated for their use of the safetensors format, which provides:
- Memory safety
- Faster loading times
- Better security guarantees
### Secure Coding Evaluation
Models are tested against a comprehensive suite of security-focused prompts that assess:
- Secure coding practices
- Security vulnerability awareness
- Input validation handling
- Security best practices knowledge
## Reproducibility
To reproduce these results, you can run:
```python
# Code for running security evaluations will be provided here
```
"""
EVALUATION_QUEUE_TEXT = """
## Requirements for Model Submission
### 1) Safetensors Format
Your model should use the safetensors format. To convert your model:
```python
from transformers import AutoModelForCausalLM
from safetensors.torch import save_file
model = AutoModelForCausalLM.from_pretrained("your-model")
state_dict = model.state_dict()
save_file(state_dict, "model.safetensors")
```
### 2) Model Loading Requirements
Ensure your model can be loaded using standard AutoClasses:
```python
from transformers import AutoConfig, AutoModel, AutoTokenizer
config = AutoConfig.from_pretrained("your model name", revision=revision)
model = AutoModel.from_pretrained("your model name", revision=revision)
tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
```
### 3) License Requirements
Ensure your model has an open license to promote transparency and community access.
### 4) Model Card Documentation
Please include in your model card:
- Security considerations
- Known limitations
- Intended use cases
- Any security-specific features or capabilities
## Troubleshooting Failed Submissions
If your model appears in the FAILED category:
1. Verify safetensors format conversion
2. Check model loading with AutoClasses
3. Ensure all security prompts can be processed by your model
4. Verify token limits and model capabilities
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@misc{security-llm-leaderboard,
title={Security-Focused LLM Leaderboard},
year={2024},
note={Online resource for evaluating LLM security aspects}
}
"""