from datasets import load_dataset
from huggingface_hub import HfApi
import os

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Get the token from environment variable
token = os.getenv('PUBLIC_DATASET_TOKEN')
if not token:
    raise ValueError('PUBLIC_DATASET_TOKEN not found in environment')

# Initialize the Hugging Face API client
api = HfApi(token=token)

# Create an empty dataset with the same schema
features = {
    'text': 'string',
    'image': 'image',
    'timestamp': 'string',
    'user': 'string'
}

# Create and push empty dataset
from datasets import Dataset
empty_dataset = Dataset.from_dict({
    'text': [], 
    'image': [], 
    'timestamp': [],
    'user': []
})

# Push the empty dataset to hub
empty_dataset.push_to_hub('rawwerks/handwriting-ocr-all', private=False, token=token)