secure_code_leaderboard_archived / init_huggingface_dataset.py
lukehinds's picture
Align datatypes
6a8f38b
raw
history blame
2.52 kB
from datasets import Dataset
from huggingface_hub import HfApi, login
import os
# Initialize the dataset with a sample entry
initial_data = {
"model": ["example/model"],
"model_raw": ["example/model"],
"base_model": ["gpt2"],
"revision": ["main"],
"precision": ["fp16"],
"weight_type": ["Safetensors"],
"model_type": ["Pretrained"],
"status": ["PENDING"],
"timestamp": ["2025-01-26T15:15:09.693973"],
"security_score": [0.5],
"safetensors_compliant": [True],
"hub_license": ["MIT"],
"hub_likes": [0],
"params_billion": [0.5],
"available_on_hub": [True],
"model_sha": ["abc123"]
}
# Create a Dataset object
dataset = Dataset.from_dict(initial_data)
# Login to Hugging Face (you'll need to set the HUGGINGFACE_TOKEN environment variable)
login()
# Push the dataset to the Hugging Face Hub
dataset.push_to_hub("stacklok/results")
# Create a dataset card
dataset_card = """
---
language:
- en
license:
- mit
---
# Dataset Card for stacklok/results
This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics.
## Dataset Structure
The dataset contains the following fields:
- `model`: The identifier of the model
- `model_raw`: The raw model identifier
- `base_model`: The base model if applicable
- `revision`: The revision or version of the model
- `precision`: The precision used for the model (e.g., fp16, fp32)
- `weight_type`: Type of weights used
- `model_type`: Type of the model
- `status`: Current status of the evaluation
- `timestamp`: When the evaluation was performed
- `security_score`: A score representing the model's security evaluation
- `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors
- `hub_license`: The license of the model on Hugging Face Hub
- `hub_likes`: Number of likes on Hugging Face Hub
- `params_billion`: Number of parameters in billions
- `available_on_hub`: Whether the model is available on Hugging Face Hub
- `model_sha`: SHA hash of the model
## Usage
This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models.
"""
# Write the dataset card
with open("README.md", "w") as f:
f.write(dataset_card)
# Upload the dataset card
api = HfApi()
api.upload_file(
path_or_fileobj="README.md",
path_in_repo="README.md",
repo_id="stacklok/results",
repo_type="dataset"
)
print("Dataset initialized and card uploaded successfully!")