Spaces:

stacklok
/

secure_code_leaderboard_archived

Running

App Files Files Community

secure_code_leaderboard_archived / init_huggingface_dataset.py

lukehinds

Align datatypes

6a8f38b about 1 month ago

raw

history blame

2.52 kB

	from datasets import Dataset
	from huggingface_hub import HfApi, login
	import os

	# Initialize the dataset with a sample entry
	initial_data = {
	"model": ["example/model"],
	"model_raw": ["example/model"],
	"base_model": ["gpt2"],
	"revision": ["main"],
	"precision": ["fp16"],
	"weight_type": ["Safetensors"],
	"model_type": ["Pretrained"],
	"status": ["PENDING"],
	"timestamp": ["2025-01-26T15:15:09.693973"],
	"security_score": [0.5],
	"safetensors_compliant": [True],
	"hub_license": ["MIT"],
	"hub_likes": [0],
	"params_billion": [0.5],
	"available_on_hub": [True],
	"model_sha": ["abc123"]
	}

	# Create a Dataset object
	dataset = Dataset.from_dict(initial_data)

	# Login to Hugging Face (you'll need to set the HUGGINGFACE_TOKEN environment variable)
	login()

	# Push the dataset to the Hugging Face Hub
	dataset.push_to_hub("stacklok/results")

	# Create a dataset card
	dataset_card = """
	---
	language:
	- en
	license:
	- mit
	---

	# Dataset Card for stacklok/results

	This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics.

	## Dataset Structure

	The dataset contains the following fields:
	- `model`: The identifier of the model
	- `model_raw`: The raw model identifier
	- `base_model`: The base model if applicable
	- `revision`: The revision or version of the model
	- `precision`: The precision used for the model (e.g., fp16, fp32)
	- `weight_type`: Type of weights used
	- `model_type`: Type of the model
	- `status`: Current status of the evaluation
	- `timestamp`: When the evaluation was performed
	- `security_score`: A score representing the model's security evaluation
	- `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors
	- `hub_license`: The license of the model on Hugging Face Hub
	- `hub_likes`: Number of likes on Hugging Face Hub
	- `params_billion`: Number of parameters in billions
	- `available_on_hub`: Whether the model is available on Hugging Face Hub
	- `model_sha`: SHA hash of the model

	## Usage

	This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models.
	"""

	# Write the dataset card
	with open("README.md", "w") as f:
	f.write(dataset_card)

	# Upload the dataset card
	api = HfApi()
	api.upload_file(
	path_or_fileobj="README.md",
	path_in_repo="README.md",
	repo_id="stacklok/results",
	repo_type="dataset"
	)

	print("Dataset initialized and card uploaded successfully!")