|
from datasets import Dataset |
|
from huggingface_hub import HfApi, login |
|
import os |
|
|
|
|
|
initial_data = { |
|
"model": ["example/model"], |
|
"model_raw": ["example/model"], |
|
"base_model": ["gpt2"], |
|
"revision": ["main"], |
|
"precision": ["fp16"], |
|
"weight_type": ["Safetensors"], |
|
"model_type": ["Pretrained"], |
|
"status": ["PENDING"], |
|
"timestamp": ["2025-01-26T15:15:09.693973"], |
|
"security_score": [0.5], |
|
"safetensors_compliant": [True], |
|
"hub_license": ["MIT"], |
|
"hub_likes": [0], |
|
"params_billion": [0.5], |
|
"available_on_hub": [True], |
|
"model_sha": ["abc123"] |
|
} |
|
|
|
|
|
dataset = Dataset.from_dict(initial_data) |
|
|
|
|
|
login() |
|
|
|
|
|
dataset.push_to_hub("stacklok/results") |
|
|
|
|
|
dataset_card = """ |
|
--- |
|
language: |
|
- en |
|
license: |
|
- mit |
|
--- |
|
|
|
# Dataset Card for stacklok/results |
|
|
|
This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics. |
|
|
|
## Dataset Structure |
|
|
|
The dataset contains the following fields: |
|
- `model`: The identifier of the model |
|
- `model_raw`: The raw model identifier |
|
- `base_model`: The base model if applicable |
|
- `revision`: The revision or version of the model |
|
- `precision`: The precision used for the model (e.g., fp16, fp32) |
|
- `weight_type`: Type of weights used |
|
- `model_type`: Type of the model |
|
- `status`: Current status of the evaluation |
|
- `timestamp`: When the evaluation was performed |
|
- `security_score`: A score representing the model's security evaluation |
|
- `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors |
|
- `hub_license`: The license of the model on Hugging Face Hub |
|
- `hub_likes`: Number of likes on Hugging Face Hub |
|
- `params_billion`: Number of parameters in billions |
|
- `available_on_hub`: Whether the model is available on Hugging Face Hub |
|
- `model_sha`: SHA hash of the model |
|
|
|
## Usage |
|
|
|
This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models. |
|
""" |
|
|
|
|
|
with open("README.md", "w") as f: |
|
f.write(dataset_card) |
|
|
|
|
|
api = HfApi() |
|
api.upload_file( |
|
path_or_fileobj="README.md", |
|
path_in_repo="README.md", |
|
repo_id="stacklok/results", |
|
repo_type="dataset" |
|
) |
|
|
|
print("Dataset initialized and card uploaded successfully!") |