from datasets import Dataset from huggingface_hub import HfApi, login import os # Initialize the dataset with a sample entry initial_data = { "model": ["example/model"], "model_raw": ["example/model"], "base_model": ["gpt2"], "revision": ["main"], "precision": ["fp16"], "weight_type": ["Safetensors"], "model_type": ["Pretrained"], "status": ["PENDING"], "timestamp": ["2025-01-26T15:15:09.693973"], "security_score": [0.5], "safetensors_compliant": [True], "hub_license": ["MIT"], "hub_likes": [0], "params_billion": [0.5], "available_on_hub": [True], "model_sha": ["abc123"] } # Create a Dataset object dataset = Dataset.from_dict(initial_data) # Login to Hugging Face (you'll need to set the HUGGINGFACE_TOKEN environment variable) login() # Push the dataset to the Hugging Face Hub dataset.push_to_hub("stacklok/results") # Create a dataset card dataset_card = """ --- language: - en license: - mit --- # Dataset Card for stacklok/results This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics. ## Dataset Structure The dataset contains the following fields: - `model`: The identifier of the model - `model_raw`: The raw model identifier - `base_model`: The base model if applicable - `revision`: The revision or version of the model - `precision`: The precision used for the model (e.g., fp16, fp32) - `weight_type`: Type of weights used - `model_type`: Type of the model - `status`: Current status of the evaluation - `timestamp`: When the evaluation was performed - `security_score`: A score representing the model's security evaluation - `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors - `hub_license`: The license of the model on Hugging Face Hub - `hub_likes`: Number of likes on Hugging Face Hub - `params_billion`: Number of parameters in billions - `available_on_hub`: Whether the model is available on Hugging Face Hub - `model_sha`: SHA hash of the model ## Usage This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models. """ # Write the dataset card with open("README.md", "w") as f: f.write(dataset_card) # Upload the dataset card api = HfApi() api.upload_file( path_or_fileobj="README.md", path_in_repo="README.md", repo_id="stacklok/results", repo_type="dataset" ) print("Dataset initialized and card uploaded successfully!")