Spaces:
Build error
Build error
from loguru import logger | |
from typing_extensions import Annotated | |
from clearml import PipelineDecorator | |
from llm_engineering.domain.dataset import InstructTrainTestSplit, PreferenceTrainTestSplit | |
from llm_engineering.settings import settings | |
def push_to_huggingface( | |
dataset: Annotated[InstructTrainTestSplit | PreferenceTrainTestSplit, "dataset_split"], | |
dataset_id: Annotated[str, "dataset_id"], | |
) -> None: | |
assert dataset_id is not None, "Dataset id must be provided for pushing to Huggingface" | |
assert ( | |
settings.HUGGINGFACE_ACCESS_TOKEN is not None | |
), "Huggingface access token must be provided for pushing to Huggingface" | |
logger.info(f"Pushing dataset {dataset_id} to Hugging Face.") | |
huggingface_dataset = dataset.to_huggingface(flatten=True) | |
huggingface_dataset.push_to_hub(dataset_id, token=settings.HUGGINGFACE_ACCESS_TOKEN) | |