from datasets import Dataset
from huggingface_hub import HfApi, login
import os

# Initialize the dataset with a sample entry
initial_data = {
    "model": ["example/model"],
    "model_raw": ["example/model"],
    "base_model": ["gpt2"],
    "revision": ["main"],
    "precision": ["fp16"],
    "weight_type": ["Safetensors"],
    "model_type": ["Pretrained"],
    "status": ["PENDING"],
    "timestamp": ["2025-01-26T15:15:09.693973"],
    "security_score": [0.5],
    "safetensors_compliant": [True],
    "hub_license": ["MIT"],
    "hub_likes": [0],
    "params_billion": [0.5],
    "available_on_hub": [True],
    "model_sha": ["abc123"]
}

# Create a Dataset object
dataset = Dataset.from_dict(initial_data)

# Login to Hugging Face (you'll need to set the HUGGINGFACE_TOKEN environment variable)
login()

# Push the dataset to the Hugging Face Hub
dataset.push_to_hub("stacklok/results")

# Create a dataset card
dataset_card = """
---
language:
- en
license:
- mit
---

# Dataset Card for stacklok/results

This dataset contains evaluation results for various models, focusing on security scores and other relevant metrics.

## Dataset Structure

The dataset contains the following fields:
- `model`: The identifier of the model
- `model_raw`: The raw model identifier
- `base_model`: The base model if applicable
- `revision`: The revision or version of the model
- `precision`: The precision used for the model (e.g., fp16, fp32)
- `weight_type`: Type of weights used
- `model_type`: Type of the model
- `status`: Current status of the evaluation
- `timestamp`: When the evaluation was performed
- `security_score`: A score representing the model's security evaluation
- `safetensors_compliant`: A boolean indicating whether the model is compliant with safetensors
- `hub_license`: The license of the model on Hugging Face Hub
- `hub_likes`: Number of likes on Hugging Face Hub
- `params_billion`: Number of parameters in billions
- `available_on_hub`: Whether the model is available on Hugging Face Hub
- `model_sha`: SHA hash of the model

## Usage

This dataset is used to populate the secure code leaderboard, providing insights into the security aspects of various models.
"""

# Write the dataset card
with open("README.md", "w") as f:
    f.write(dataset_card)

# Upload the dataset card
api = HfApi()
api.upload_file(
    path_or_fileobj="README.md",
    path_in_repo="README.md",
    repo_id="stacklok/results",
    repo_type="dataset"
)

print("Dataset initialized and card uploaded successfully!")