handwriting-ocr / clear_dataset.py
Raymond Weitekamp
feat: add anonymous submission option for public dataset
8388b94
raw
history blame contribute delete
835 Bytes
from datasets import load_dataset
from huggingface_hub import HfApi
import os
# Load environment variables
from dotenv import load_dotenv
load_dotenv()
# Get the token from environment variable
token = os.getenv('PUBLIC_DATASET_TOKEN')
if not token:
raise ValueError('PUBLIC_DATASET_TOKEN not found in environment')
# Initialize the Hugging Face API client
api = HfApi(token=token)
# Create an empty dataset with the same schema
features = {
'text': 'string',
'image': 'image',
'timestamp': 'string',
'user': 'string'
}
# Create and push empty dataset
from datasets import Dataset
empty_dataset = Dataset.from_dict({
'text': [],
'image': [],
'timestamp': [],
'user': []
})
# Push the empty dataset to hub
empty_dataset.push_to_hub('rawwerks/handwriting-ocr-all', private=False, token=token)